521 files changed, 18941 insertions, 12234 deletions
diff --git a/Documentation/gpu/amdgpu/amd-hardware-list-info.rst b/Documentation/gpu/amdgpu/amd-hardware-list-info.rst
new file mode 100644
index 000000000000..1786544fe7c1
--- /dev/null
+++ b/Documentation/gpu/amdgpu/amd-hardware-list-info.rst
@@ -0,0 +1,23 @@
+=================================================
+ AMD Hardware Components Information per Product
+=================================================
+
+On this page, you can find the AMD product name and which component version is
+part of it.
+
+Accelerated Processing Units (APU) Info
+---------------------------------------
+
+.. csv-table::
+   :header-rows: 1
+   :widths: 3, 2, 2, 1, 1, 1, 1
+   :file: ./apu-asic-info-table.csv
+
+Discrete GPU Info
+-----------------
+
+.. csv-table::
+   :header-rows: 1
+   :widths: 3, 2, 2, 1, 1, 1
+   :file: ./dgpu-asic-info-table.csv
+
diff --git a/Documentation/gpu/amdgpu/amdgpu-glossary.rst b/Documentation/gpu/amdgpu/amdgpu-glossary.rst
index 1e9283e076ba..2040da593b14 100644
--- a/Documentation/gpu/amdgpu/amdgpu-glossary.rst
+++ b/Documentation/gpu/amdgpu/amdgpu-glossary.rst
@@ -12,18 +12,39 @@ we have a dedicated glossary for Display Core at
       The number of CUs that are active on the system.  The number of active
       CUs may be less than SE * SH * CU depending on the board configuration.
 
+    BACO
+      Bus Alive, Chip Off
+
+    BOCO
+      Bus Off, Chip Off
+
     CE
       Constant Engine
 
+    CIK
+      Sea Islands
+
+    CB
+      Color Buffer
+
     CP
       Command Processor
 
     CPLIB
       Content Protection Library
 
+    CS
+      Command Submission
+
+    CSB
+      Clear State Indirect Buffer
+
     CU
       Compute Unit
 
+    DB
+      Depth Buffer
+
     DFS
       Digital Frequency Synthesizer
 
@@ -33,6 +54,9 @@ we have a dedicated glossary for Display Core at
     EOP
       End Of Pipe/Pipeline
 
+    FLR
+      Function Level Reset
+
     GART
       Graphics Address Remapping Table.  This is the name we use for the GPUVM
       page table used by the GPU kernel driver.  It remaps system resources
@@ -45,6 +69,12 @@ we have a dedicated glossary for Display Core at
     GC
       Graphics and Compute
 
+    GDS
+      Global Data Share
+
+    GE
+      Geometry Engine
+
     GMC
       Graphic Memory Controller
 
@@ -80,6 +110,9 @@ we have a dedicated glossary for Display Core at
     KCQ
       Kernel Compute Queue
 
+    KFD
+      Kernel Fusion Driver
+
     KGQ
       Kernel Graphics Queue
 
@@ -89,6 +122,9 @@ we have a dedicated glossary for Display Core at
     MC
       Memory Controller
 
+    MCBP
+      Mid Command Buffer Preemption
+
     ME
       MicroEngine (Graphics)
 
@@ -104,6 +140,9 @@ we have a dedicated glossary for Display Core at
     MQD
       Memory Queue Descriptor
 
+    PA
+      Primitive Assembler / Physical Address
+
     PFP
       Pre-Fetch Parser (Graphics)
 
@@ -113,24 +152,39 @@ we have a dedicated glossary for Display Core at
     PSP
         Platform Security Processor
 
+    RB
+      Render Backends. Some people called it ROPs.
+
     RLC
       RunList Controller. This name is a remnant of past ages and doesn't have
       much meaning today. It's a group of general-purpose helper engines for
       the GFX block. It's involved in GFX power management and SR-IOV, among
       other things.
 
+    SC
+      Scan Converter
+
     SDMA
       System DMA
 
     SE
       Shader Engine
 
+    SGPR
+      Scalar General-Purpose Registers
+
     SH
       SHader array
 
+    SI
+      Southern Islands
+
     SMU/SMC
       System Management Unit / System Management Controller
 
+    SPI
+      Shader Processor Input
+
     SRLC
       Save/Restore List Control
 
@@ -143,12 +197,21 @@ we have a dedicated glossary for Display Core at
     SS
       Spread Spectrum
 
+    SX
+      Shader Export
+
     TA
       Trusted Application
 
+    TC
+      Texture Cache
+
     TOC
       Table of Contents
 
+    UMSCH
+      User Mode Scheduler
+
     UVD
       Unified Video Decoder
 
@@ -158,5 +221,17 @@ we have a dedicated glossary for Display Core at
     VCN
       Video Codec Next
 
+    VGPR
+      Vector General-Purpose Registers
+
+    VMID
+      Virtual Memory ID
+
     VPE
       Video Processing Engine
+
+    XCC
+      Accelerator Core Complex
+
+    XCP
+      Accelerator Core Partition
diff --git a/Documentation/gpu/amdgpu/apu-asic-info-table.csv b/Documentation/gpu/amdgpu/apu-asic-info-table.csv
index 5dd4b8762d19..1d50b539677f 100644
--- a/Documentation/gpu/amdgpu/apu-asic-info-table.csv
+++ b/Documentation/gpu/amdgpu/apu-asic-info-table.csv
@@ -13,3 +13,5 @@ Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8
 Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
 Ryzen 8x40 series, Hawk Point, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
 Ryzen AI 300 series, Strix Point, 3.5.0, 11.5.0, 4.0.5, 6.1.0, 14.0.0
+Ryzen AI 350 series, Krackan Point, 3.5.0, 11.5.2, 4.0.5, 6.1.2, 14.0.4
+Ryzen AI Max 300 series, Strix Halo, 3.5.1, 11.5.1, 4.0.6, 6.1.1, 14.0.1
diff --git a/Documentation/gpu/amdgpu/debugfs.rst b/Documentation/gpu/amdgpu/debugfs.rst
new file mode 100644
index 000000000000..5150d0a95658
--- /dev/null
+++ b/Documentation/gpu/amdgpu/debugfs.rst
@@ -0,0 +1,210 @@
+==============
+AMDGPU DebugFS
+==============
+
+The amdgpu driver provides a number of debugfs files to aid in debugging
+issues in the driver.  These are usually found in
+/sys/kernel/debug/dri/<num>.
+
+DebugFS Files
+=============
+
+amdgpu_benchmark
+----------------
+
+Run benchmarks using the DMA engine the driver uses for GPU memory paging.
+Write a number to the file to run the test.  The results are written to the
+kernel log.  VRAM is on device memory (dGPUs) or carve out (APUs) and GTT
+(Graphics Translation Tables) is system memory that is accessible by the GPU.
+The following tests are available:
+
+- 1: simple test, VRAM to GTT and GTT to VRAM
+- 2: simple test, VRAM to VRAM
+- 3: GTT to VRAM, buffer size sweep, powers of 2
+- 4: VRAM to GTT, buffer size sweep, powers of 2
+- 5: VRAM to VRAM, buffer size sweep, powers of 2
+- 6: GTT to VRAM, buffer size sweep, common display sizes
+- 7: VRAM to GTT, buffer size sweep, common display sizes
+- 8: VRAM to VRAM, buffer size sweep, common display sizes
+
+amdgpu_test_ib
+--------------
+
+Read this file to run simple IB (Indirect Buffer) tests on all kernel managed
+rings.  IBs are command buffers usually generated by userspace applications
+which are submitted to the kernel for execution on an particular GPU engine.
+This just runs the simple IB tests included in the kernel.  These tests
+are engine specific and verify that IB submission works.
+
+amdgpu_discovery
+----------------
+
+Provides raw access to the IP discovery binary provided by the GPU.  Read this
+file to access the raw binary.  This is useful for verifying the contents of
+the IP discovery table.  It is chip specific.
+
+amdgpu_vbios
+------------
+
+Provides raw access to the ROM binary image from the GPU.  Read this file to
+access the raw binary.  This is useful for verifying the contents of the
+video BIOS ROM.  It is board specific.
+
+amdgpu_evict_gtt
+----------------
+
+Evict all buffers from the GTT memory pool.  Read this file to evict all
+buffers from this pool.
+
+amdgpu_evict_vram
+-----------------
+
+Evict all buffers from the VRAM memory pool.  Read this file to evict all
+buffers from this pool.
+
+amdgpu_gpu_recover
+------------------
+
+Trigger a GPU reset.  Read this file to trigger reset the entire GPU.
+All work currently running  on the GPU will be lost.
+
+amdgpu_ring_<name>
+------------------
+
+Provides read access to the kernel managed ring buffers for each ring <name>.
+These are useful for debugging problems on a particular ring.  The ring buffer
+is how the CPU sends commands to the GPU.  The CPU writes commands into the
+buffer and then asks the GPU engine to process it.  This is the raw binary
+contents of the ring buffer.  Use a tool like UMR to decode the rings into human
+readable form.
+
+amdgpu_mqd_<name>
+-----------------
+
+Provides read access to the kernel managed MQD (Memory Queue Descriptor) for
+ring <name> managed by the kernel driver.  MQDs define the features of the ring
+and are used to store the ring's state when it is not connected to hardware.
+The driver writes the requested ring features and metadata (GPU addresses of
+the ring itself and associated buffers) to the MQD and the firmware uses the MQD
+to populate the hardware when the ring is mapped to a hardware slot.  Only
+available on engines which use MQDs.  This provides access to the raw MQD
+binary.
+
+amdgpu_error_<name>
+-------------------
+
+Provides an interface to set an error code on the dma fences associated with
+ring <name>.  The error code specified is propogated to all fences associated
+with the ring.  Use this to inject a fence error into a ring.
+
+amdgpu_pm_info
+--------------
+
+Provides human readable information about the power management features
+and state of the GPU.  This includes current GFX clock, Memory clock,
+voltages, average SoC power, temperature, GFX load, Memory load, SMU
+feature mask, VCN power state, clock and power gating features.
+
+amdgpu_firmware_info
+--------------------
+
+Lists the firmware versions for all firmwares used by the GPU.  Only
+entries with a non-0 version are valid.  If the version is 0, the firmware
+is not valid for the GPU.
+
+amdgpu_fence_info
+-----------------
+
+Shows the last signalled and emitted fence sequence numbers for each
+kernel driver managed ring.  Fences are associated with submissions
+to the engine.  Emitted fences have been submitted to the ring
+and signalled fences have been signalled by the GPU.  Rings with a
+larger emitted fence value have outstanding work that is still being
+processed by the engine that owns that ring.  When the emitted and
+signalled fence values are equal, the ring is idle.
+
+amdgpu_gem_info
+---------------
+
+Lists all of the PIDs using the GPU and the GPU buffers that they have
+allocated.  This lists the buffer size, pool (VRAM, GTT, etc.), and buffer
+attributes (CPU access required, CPU cache attributes, etc.).
+
+amdgpu_vm_info
+--------------
+
+Lists all of the PIDs using the GPU and the GPU buffers that they have
+allocated as well as the status of those buffers relative to that process'
+GPU virtual address space (e.g., evicted, idle, invalidated, etc.).
+
+amdgpu_sa_info
+--------------
+
+Prints out all of the suballocations (sa) by the suballocation manager in the
+kernel driver.  Prints the GPU address, size, and fence info associated
+with each suballocation.  The suballocations are used internally within
+the kernel driver for various things.
+
+amdgpu_<pool>_mm
+----------------
+
+Prints TTM information about the memory pool <pool>.
+
+amdgpu_vram
+-----------
+
+Provides direct access to VRAM.  Used by tools like UMR to inspect
+objects in VRAM.
+
+amdgpu_iomem
+------------
+
+Provides direct access to GTT memory.  Used by tools like UMR to inspect
+GTT memory.
+
+amdgpu_regs_*
+-------------
+
+Provides direct access to various register aperatures on the GPU.  Used
+by tools like UMR to access GPU registers.
+
+amdgpu_regs2
+------------
+
+Provides an IOCTL interface used by UMR for interacting with GPU registers.
+
+
+amdgpu_sensors
+--------------
+
+Provides an interface to query GPU power metrics (temperature, average
+power, etc.).  Used by tools like UMR to query GPU power metrics.
+
+
+amdgpu_gca_config
+-----------------
+
+Provides an interface to query GPU details (Graphics/Compute Array config,
+PCI config, GPU family, etc.).  Used by tools like UMR to query GPU details.
+
+amdgpu_wave
+-----------
+
+Used to query GFX/compute wave information from the hardware.  Used by tools
+like UMR to query GFX/compute wave information.
+
+amdgpu_gpr
+----------
+
+Used to query GFX/compute GPR (General Purpose Register) information from the
+hardware.  Used by tools like UMR to query GPRs when debugging shaders.
+
+amdgpu_gprwave
+--------------
+
+Provides an IOCTL interface used by UMR for interacting with shader waves.
+
+amdgpu_fw_attestation
+---------------------
+
+Provides an interface for reading back firmware attestation records.
diff --git a/Documentation/gpu/amdgpu/debugging.rst b/Documentation/gpu/amdgpu/debugging.rst
index e75f97d0e4ea..7cbfea0606e1 100644
--- a/Documentation/gpu/amdgpu/debugging.rst
+++ b/Documentation/gpu/amdgpu/debugging.rst
@@ -2,6 +2,13 @@
  GPU Debugging
 ===============
 
+General Debugging Options
+=========================
+
+The DebugFS section provides documentation on a number files to aid in debugging
+issues on the GPU.
+
+
 GPUVM Debugging
 ===============
 
diff --git a/Documentation/gpu/amdgpu/display/dc-debug.rst b/Documentation/gpu/amdgpu/display/dc-debug.rst
index 013f63b271f3..605dca21f4ae 100644
--- a/Documentation/gpu/amdgpu/display/dc-debug.rst
+++ b/Documentation/gpu/amdgpu/display/dc-debug.rst
@@ -154,7 +154,7 @@ of the display parameters, but the userspace might also cause this issue. One
 way to identify the source of the problem is to take a screenshot or make a
 desktop video capture when the problem happens; after checking the
 screenshot/video recording, if you don't see any of the artifacts, it means
-that the issue is likely on the the driver side. If you can still see the
+that the issue is likely on the driver side. If you can still see the
 problem in the data collected, it is an issue that probably happened during
 rendering, and the display code just got the framebuffer already corrupted.
 
diff --git a/Documentation/gpu/amdgpu/driver-core.rst b/Documentation/gpu/amdgpu/driver-core.rst
index 32723a925377..81256318e93c 100644
--- a/Documentation/gpu/amdgpu/driver-core.rst
+++ b/Documentation/gpu/amdgpu/driver-core.rst
@@ -67,36 +67,66 @@ GC (Graphics and Compute)
     This is the graphics and compute engine, i.e., the block that
     encompasses the 3D pipeline and and shader blocks.  This is by far the
     largest block on the GPU.  The 3D pipeline has tons of sub-blocks.  In
-    addition to that, it also contains the CP microcontrollers (ME, PFP,
-    CE, MEC) and the RLC microcontroller.  It's exposed to userspace for
-    user mode drivers (OpenGL, Vulkan, OpenCL, etc.)
+    addition to that, it also contains the CP microcontrollers (ME, PFP, CE,
+    MEC) and the RLC microcontroller.  It's exposed to userspace for user mode
+    drivers (OpenGL, Vulkan, OpenCL, etc.). More details in :ref:`Graphics (GFX)
+    and Compute <amdgpu-gc>`.
 
 VCN (Video Core Next)
     This is the multi-media engine.  It handles video and image encode and
     decode.  It's exposed to userspace for user mode drivers (VA-API,
     OpenMAX, etc.)
 
-Graphics and Compute Microcontrollers
--------------------------------------
-
-CP (Command Processor)
-    The name for the hardware block that encompasses the front end of the
-    GFX/Compute pipeline.  Consists mainly of a bunch of microcontrollers
-    (PFP, ME, CE, MEC).  The firmware that runs on these microcontrollers
-    provides the driver interface to interact with the GFX/Compute engine.
-
-    MEC (MicroEngine Compute)
-        This is the microcontroller that controls the compute queues on the
-        GFX/compute engine.
-
-    MES (MicroEngine Scheduler)
-        This is a new engine for managing queues.  This is currently unused.
-
-RLC (RunList Controller)
-    This is another microcontroller in the GFX/Compute engine.  It handles
-    power management related functionality within the GFX/Compute engine.
-    The name is a vestige of old hardware where it was originally added
-    and doesn't really have much relation to what the engine does now.
+.. _pipes-and-queues-description:
+
+GFX, Compute, and SDMA Overall Behavior
+=======================================
+
+.. note:: For simplicity, whenever the term block is used in this section, it
+   means GFX, Compute, and SDMA.
+
+GFX, Compute and SDMA share a similar form of operation that can be abstracted
+to facilitate understanding of the behavior of these blocks. See the figure
+below illustrating the common components of these blocks:
+
+.. kernel-figure:: pipe_and_queue_abstraction.svg
+
+In the central part of this figure, you can see two hardware elements, one called
+**Pipes** and another called **Queues**; it is important to highlight that Queues
+must be associated with a Pipe and vice-versa. Every specific hardware IP may have
+a different number of Pipes and, in turn, a different number of Queues; for
+example, GFX 11 has two Pipes and two Queues per Pipe for the GFX front end.
+
+Pipe is the hardware that processes the instructions available in the Queues;
+in other words, it is a thread executing the operations inserted in the Queue.
+One crucial characteristic of Pipes is that they can only execute one Queue at
+a time; no matter if the hardware has multiple Queues in the Pipe, it only runs
+one Queue per Pipe.
+
+Pipes have the mechanics of swapping between queues at the hardware level.
+Nonetheless, they only make use of Queues that are considered mapped. Pipes can
+switch between queues based on any of the following inputs:
+
+1. Command Stream;
+2. Packet by Packet;
+3. Other hardware requests the change (e.g., MES).
+
+Queues within Pipes are defined by the Hardware Queue Descriptors (HQD).
+Associated with the HQD concept, we have the Memory Queue Descriptor (MQD),
+which is responsible for storing information about the state of each of the
+available Queues in the memory. The state of a Queue contains information such
+as the GPU virtual address of the queue itself, save areas, doorbell, etc. The
+MQD also stores the HQD registers, which are vital for activating or
+deactivating a given Queue.  The scheduling firmware (e.g., MES) is responsible
+for loading HQDs from MQDs and vice versa.
+
+The Queue-switching process can also happen with the firmware requesting the
+preemption or unmapping of a Queue. The firmware waits for the HQD_ACTIVE bit
+to change to low before saving the state into the MQD. To make a different
+Queue become active, the firmware copies the MQD state into the HQD registers
+and loads any additional state. Finally, it sets the HQD_ACTIVE bit to high to
+indicate that the queue is active.  The Pipe will then execute work from active
+Queues.
 
 Driver Structure
 ================
@@ -110,7 +140,8 @@ Some useful constructs:
 KIQ (Kernel Interface Queue)
     This is a control queue used by the kernel driver to manage other gfx
     and compute queues on the GFX/compute engine.  You can use it to
-    map/unmap additional queues, etc.
+    map/unmap additional queues, etc.  This is replaced by MES on
+    GFX 11 and newer hardware.
 
 IB (Indirect Buffer)
     A command buffer for a particular engine.  Rather than writing
diff --git a/Documentation/gpu/amdgpu/driver-misc.rst b/Documentation/gpu/amdgpu/driver-misc.rst
index e40e15f89fd3..25b0c857816e 100644
--- a/Documentation/gpu/amdgpu/driver-misc.rst
+++ b/Documentation/gpu/amdgpu/driver-misc.rst
@@ -50,23 +50,6 @@ board_info
 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
    :doc: board_info
 
-Accelerated Processing Units (APU) Info
----------------------------------------
-
-.. csv-table::
-   :header-rows: 1
-   :widths: 3, 2, 2, 1, 1, 1, 1
-   :file: ./apu-asic-info-table.csv
-
-Discrete GPU Info
------------------
-
-.. csv-table::
-   :header-rows: 1
-   :widths: 3, 2, 2, 1, 1, 1
-   :file: ./dgpu-asic-info-table.csv
-
-
 GPU Memory Usage Information
 ============================
 
diff --git a/Documentation/gpu/amdgpu/gc/index.rst b/Documentation/gpu/amdgpu/gc/index.rst
new file mode 100644
index 000000000000..ff6e9ef5cbee
--- /dev/null
+++ b/Documentation/gpu/amdgpu/gc/index.rst
@@ -0,0 +1,52 @@
+.. _amdgpu-gc:
+
+========================================
+ drm/amdgpu - Graphics and Compute (GC)
+========================================
+
+The relationship between the CPU and GPU can be described as the
+producer-consumer problem, where the CPU fills out a buffer with operations
+(producer) to be executed by the GPU (consumer). The requested operations in
+the buffer are called Command Packets, which can be summarized as a compressed
+way of transmitting command information to the graphics controller.
+
+The component that acts as the front end between the CPU and the GPU is called
+the Command Processor (CP). This component is responsible for providing greater
+flexibility to the GC since CP makes it possible to program various aspects of
+the GPU pipeline. CP also coordinates the communication between the CPU and GPU
+via a mechanism named **Ring Buffers**, where the CPU appends information to
+the buffer while the GPU removes operations. It is relevant to highlight that a
+CPU can add a pointer to the Ring Buffer that points to another region of
+memory outside the Ring Buffer, and CP can handle it; this mechanism is called
+**Indirect Buffer (IB)**. CP receives and parses the Command Streams (CS), and
+writes the operations to the correct hardware blocks.
+
+Graphics (GFX) and Compute Microcontrollers
+-------------------------------------------
+
+GC is a large block, and as a result, it has multiple firmware associated with
+it. Some of them are:
+
+CP (Command Processor)
+    The name for the hardware block that encompasses the front end of the
+    GFX/Compute pipeline. Consists mainly of a bunch of microcontrollers
+    (PFP, ME, CE, MEC). The firmware that runs on these microcontrollers
+    provides the driver interface to interact with the GFX/Compute engine.
+
+    MEC (MicroEngine Compute)
+        This is the microcontroller that controls the compute queues on the
+        GFX/compute engine.
+
+    MES (MicroEngine Scheduler)
+        This is the engine for managing queues. For more details check
+        :ref:`MicroEngine Scheduler (MES) <amdgpu-mes>`.
+
+RLC (RunList Controller)
+    This is another microcontroller in the GFX/Compute engine. It handles
+    power management related functionality within the GFX/Compute engine.
+    The name is a vestige of old hardware where it was originally added
+    and doesn't really have much relation to what the engine does now.
+
+.. toctree::
+
+   mes.rst
diff --git a/Documentation/gpu/amdgpu/gc/mes.rst b/Documentation/gpu/amdgpu/gc/mes.rst
new file mode 100644
index 000000000000..b99eb211b179
--- /dev/null
+++ b/Documentation/gpu/amdgpu/gc/mes.rst
@@ -0,0 +1,38 @@
+.. _amdgpu-mes:
+
+=============================
+ MicroEngine Scheduler (MES)
+=============================
+
+.. note::
+   Queue and ring buffer are used as a synonymous.
+
+.. note::
+   This section assumes that you are familiar with the concept of Pipes, Queues, and GC.
+   If not, check :ref:`GFX, Compute, and SDMA Overall Behavior<pipes-and-queues-description>`
+   and :ref:`drm/amdgpu - Graphics and Compute (GC) <amdgpu-gc>`.
+
+Every GFX has a pipe component with one or more hardware queues. Pipes can
+switch between queues depending on certain conditions, and one of the
+components that can request a queue switch to a pipe is the MicroEngine
+Scheduler (MES). Whenever the driver is initialized, it creates one MQD per
+hardware queue, and then the MQDs are handed to the MES firmware for mapping
+to:
+
+1. Kernel Queues (legacy): This queue is statically mapped to HQDs and never
+   preempted. Even though this is a legacy feature, it is the current default, and
+   most existing hardware supports it. When an application submits work to the
+   kernel driver, it submits all of the application command buffers to the kernel
+   queues. The CS IOCTL takes the command buffer from the applications and
+   schedules them on the kernel queue.
+
+2. User Queues: These queues are dynamically mapped to the HQDs. Regarding the
+   utilization of User Queues, the userspace application will create its user
+   queues and submit work directly to its user queues with no need to IOCTL for
+   each submission and no need to share a single kernel queue.
+
+In terms of User Queues, MES can dynamically map them to the HQD. If there are
+more MQDs than HQDs, the MES firmware will preempt other user queues to make
+sure each queues get a time slice; in other words, MES is a microcontroller
+that handles the mapping and unmapping of MQDs into HQDs, as well as the
+priorities and oversubscription of MQDs.
diff --git a/Documentation/gpu/amdgpu/index.rst b/Documentation/gpu/amdgpu/index.rst
index 302d039928ee..bb2894b5edaf 100644
--- a/Documentation/gpu/amdgpu/index.rst
+++ b/Documentation/gpu/amdgpu/index.rst
@@ -7,8 +7,10 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures.
 
 .. toctree::
 
-   module-parameters
    driver-core
+   amd-hardware-list-info
+   module-parameters
+   gc/index
    display/index
    flashing
    xgmi
@@ -16,5 +18,6 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures.
    thermal
    driver-misc
    debugging
+   debugfs
    process-isolation
    amdgpu-glossary
diff --git a/Documentation/gpu/amdgpu/pipe_and_queue_abstraction.svg b/Documentation/gpu/amdgpu/pipe_and_queue_abstraction.svg
new file mode 100644
index 000000000000..0df3c6b3000b
--- /dev/null
+++ b/Documentation/gpu/amdgpu/pipe_and_queue_abstraction.svg
@@ -0,0 +1,1279 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   width="395.47891mm"
+   height="234.73715mm"
+   viewBox="0 0 395.47891 234.73714"
+   version="1.1"
+   id="svg1"
+   inkscape:version="1.4 (e7c3feb100, 2024-10-09)"
+   sodipodi:docname="pipe_and_queue_abstraction.svg"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg">
+  <sodipodi:namedview
+     id="namedview1"
+     pagecolor="#ffffff"
+     bordercolor="#000000"
+     borderopacity="0.25"
+     inkscape:showpageshadow="2"
+     inkscape:pageopacity="0.0"
+     inkscape:pagecheckerboard="0"
+     inkscape:deskcolor="#d1d1d1"
+     inkscape:document-units="mm"
+     inkscape:zoom="1.6489689"
+     inkscape:cx="713.17296"
+     inkscape:cy="466.65527"
+     inkscape:window-width="3840"
+     inkscape:window-height="2083"
+     inkscape:window-x="0"
+     inkscape:window-y="0"
+     inkscape:window-maximized="1"
+     inkscape:current-layer="layer1" />
+  <defs
+     id="defs1">
+    <marker
+       style="overflow:visible"
+       id="Dot"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Dot"
+       markerWidth="1"
+       markerHeight="1"
+       viewBox="0 0 1 1"
+       inkscape:isstock="true"
+       inkscape:collect="always"
+       preserveAspectRatio="xMidYMid">
+      <path
+         transform="scale(0.5)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:none"
+         d="M 5,0 C 5,2.76 2.76,5 0,5 -2.76,5 -5,2.76 -5,0 c 0,-2.76 2.3,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         sodipodi:nodetypes="sssss"
+         id="path110" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="marker109"
+       refX="0"
+       refY="0"
+       orient="auto-start-reverse"
+       inkscape:stockid="Stylized triangle arrow"
+       markerWidth="1"
+       markerHeight="1"
+       viewBox="0 0 1 1"
+       inkscape:isstock="true"
+       inkscape:collect="always"
+       preserveAspectRatio="xMidYMid">
+      <path
+         transform="scale(0.5)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="m 6,0 c -3,1 -7,3 -9,5 0,0 0,-4 2,-5 -2,-1 -2,-5 -2,-5 2,2 6,4 9,5 z"
+         id="path109" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="ArrowTriangleStylized"
+       refX="0"
+       refY="0"
+       orient="auto-start-reverse"
+       inkscape:stockid="Stylized triangle arrow"
+       markerWidth="1"
+       markerHeight="1"
+       viewBox="0 0 1 1"
+       inkscape:isstock="true"
+       inkscape:collect="always"
+       preserveAspectRatio="xMidYMid">
+      <path
+         transform="scale(0.5)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="m 6,0 c -3,1 -7,3 -9,5 0,0 0,-4 2,-5 -2,-1 -2,-5 -2,-5 2,2 6,4 9,5 z"
+         id="path108" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="ArrowWide"
+       refX="0"
+       refY="0"
+       orient="auto-start-reverse"
+       inkscape:stockid="Wide arrow"
+       markerWidth="1"
+       markerHeight="1"
+       viewBox="0 0 1 1"
+       inkscape:isstock="true"
+       inkscape:collect="always"
+       preserveAspectRatio="xMidYMid">
+      <path
+         style="fill:none;stroke:context-stroke;stroke-width:1;stroke-linecap:butt"
+         d="M 3,-3 0,0 3,3"
+         transform="rotate(180,0.125,0)"
+         sodipodi:nodetypes="ccc"
+         id="path1" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Triangle"
+       refX="0"
+       refY="0"
+       orient="auto-start-reverse"
+       inkscape:stockid="Triangle arrow"
+       markerWidth="1"
+       markerHeight="1"
+       viewBox="0 0 1 1"
+       inkscape:isstock="true"
+       inkscape:collect="always"
+       preserveAspectRatio="xMidYMid">
+      <path
+         transform="scale(0.5)"
+         style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         id="path135" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="ArrowWideHeavy"
+       refX="0"
+       refY="0"
+       orient="auto-start-reverse"
+       inkscape:stockid="Wide, heavy arrow"
+       markerWidth="1"
+       markerHeight="1"
+       viewBox="0 0 1 1"
+       inkscape:isstock="true"
+       inkscape:collect="always"
+       preserveAspectRatio="xMidYMid">
+      <path
+         style="fill:context-stroke;fill-rule:evenodd;stroke:none"
+         d="m 1,0 -3,3 h -2 l 3,-3 -3,-3 h 2 z"
+         id="path71" />
+    </marker>
+  </defs>
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(149.03517,55.110629)">
+    <circle
+       style="fill:#ffeeaa;fill-opacity:1;stroke:#1a1a1a;stroke-width:0.733436;stroke-dasharray:none;stroke-dashoffset:0"
+       id="path98"
+       cx="-35.757576"
+       cy="-10.495151"
+       r="44.24876" />
+    <rect
+       style="fill:none;stroke:#000000;stroke-width:0.878057;stroke-dasharray:none"
+       id="rect1"
+       width="167.79619"
+       height="24.831829"
+       x="14.21942"
+       y="57.862854" />
+    <g
+       id="g11"
+       transform="translate(24.021362,-46.545299)">
+      <rect
+         style="fill:none;stroke:#006680;stroke-width:0.459999;stroke-dasharray:none"
+         id="rect2"
+         width="131.09708"
+         height="5.8163381"
+         x="23.245802"
+         y="107.16314" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 37.884707,107.26337 v 5.53565"
+         id="path2"
+         sodipodi:nodetypes="cc" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 52.466233,107.26337 v 5.53565"
+         id="path3"
+         sodipodi:nodetypes="cc" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 67.047758,107.26337 v 5.53565"
+         id="path4"
+         sodipodi:nodetypes="cc" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 81.629283,107.26337 v 5.53565"
+         id="path5"
+         sodipodi:nodetypes="cc" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 96.210809,107.26337 v 5.53565"
+         id="path6"
+         sodipodi:nodetypes="cc" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 110.79233,107.26337 v 5.53565"
+         id="path7"
+         sodipodi:nodetypes="cc" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 125.37386,107.26337 v 5.53565"
+         id="path8"
+         sodipodi:nodetypes="cc" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 139.95538,107.26337 v 5.53565"
+         id="path9"
+         sodipodi:nodetypes="cc" />
+      <text
+         xml:space="preserve"
+         style="font-size:3.175px;text-align:start;writing-mode:lr-tb;direction:ltr;text-anchor:start;fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         x="86.713425"
+         y="110.1963"
+         id="text11"><tspan
+           sodipodi:role="line"
+           id="tspan11"
+           style="stroke:#006680;stroke-width:0.5"
+           x="86.713425"
+           y="110.1963">. . .</tspan></text>
+    </g>
+    <g
+       id="g18"
+       transform="translate(24.021362,-32.25779)">
+      <rect
+         style="fill:none;stroke:#006680;stroke-width:0.459999;stroke-dasharray:none"
+         id="rect11"
+         width="131.09708"
+         height="5.8163381"
+         x="23.245802"
+         y="107.16314" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 37.884707,107.26337 v 5.53565"
+         id="path11"
+         sodipodi:nodetypes="cc" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 52.466233,107.26337 v 5.53565"
+         id="path12"
+         sodipodi:nodetypes="cc" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 67.047758,107.26337 v 5.53565"
+         id="path13"
+         sodipodi:nodetypes="cc" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 81.629283,107.26337 v 5.53565"
+         id="path14"
+         sodipodi:nodetypes="cc" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 96.210809,107.26337 v 5.53565"
+         id="path15"
+         sodipodi:nodetypes="cc" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 110.79233,107.26337 v 5.53565"
+         id="path16"
+         sodipodi:nodetypes="cc" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 125.37386,107.26337 v 5.53565"
+         id="path17"
+         sodipodi:nodetypes="cc" />
+      <path
+         style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         d="m 139.95538,107.26337 v 5.53565"
+         id="path18"
+         sodipodi:nodetypes="cc" />
+      <text
+         xml:space="preserve"
+         style="font-size:3.175px;text-align:start;writing-mode:lr-tb;direction:ltr;text-anchor:start;fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         x="86.713425"
+         y="110.1963"
+         id="text18"><tspan
+           sodipodi:role="line"
+           id="tspan18"
+           style="stroke:#006680;stroke-width:0.5"
+           x="86.713425"
+           y="110.1963">. . .</tspan></text>
+    </g>
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;text-align:start;writing-mode:lr-tb;direction:ltr;text-anchor:start;fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+       x="113.37768"
+       y="68.463142"
+       id="text19"><tspan
+         sodipodi:role="line"
+         id="tspan19"
+         style="writing-mode:tb-rl;stroke-width:0.5"
+         x="113.37768"
+         y="68.463142">. . .</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="25.005701"
+       y="55.308445"
+       id="text844-2-9"><tspan
+         sodipodi:role="line"
+         x="25.005701"
+         y="55.308445"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan2868">Pipe[0]</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="-74.441521"
+       y="63.075123"
+       id="text844-2-9-4"><tspan
+         sodipodi:role="line"
+         x="-74.441521"
+         y="63.075123"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan2868-7">MQD</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="30.264952"
+       y="65.490654"
+       id="text20"><tspan
+         sodipodi:role="line"
+         x="30.264952"
+         y="65.490654"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan20">Queue[0]</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="30.264952"
+       y="79.249001"
+       id="text21"><tspan
+         sodipodi:role="line"
+         x="30.264952"
+         y="79.249001"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan21">Queue[n]</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="30.264952"
+       y="71.84066"
+       id="text22"><tspan
+         sodipodi:role="line"
+         x="30.264952"
+         y="71.84066"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan22">...</tspan></text>
+    <g
+       id="g71"
+       transform="translate(-23.283342)">
+      <rect
+         style="fill:#ffffff;fill-opacity:0;stroke:#000000;stroke-width:0.5;stroke-dasharray:0.5, 0.5;stroke-dashoffset:0"
+         id="rect67"
+         width="18.533583"
+         height="114.96632"
+         x="250.9435"
+         y="54.754276"
+         ry="6.0427966" />
+    </g>
+    <rect
+       style="fill:#1a1a1a;fill-opacity:0;stroke:#000000;stroke-width:0.499999;stroke-dasharray:none;stroke-dashoffset:0"
+       id="rect68"
+       width="188.21231"
+       height="139.5948"
+       x="4.0113592"
+       y="37.597778"
+       ry="0" />
+    <g
+       id="g43"
+       transform="translate(0,40.745853)">
+      <rect
+         style="fill:none;stroke:#000000;stroke-width:0.878057;stroke-dasharray:none"
+         id="rect22"
+         width="167.79619"
+         height="24.831829"
+         x="14.21942"
+         y="57.862854" />
+      <g
+         id="g30"
+         transform="translate(24.021362,-46.545299)">
+        <rect
+           style="fill:none;stroke:#006680;stroke-width:0.459999;stroke-dasharray:none"
+           id="rect23"
+           width="131.09708"
+           height="5.8163381"
+           x="23.245802"
+           y="107.16314" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 37.884707,107.26337 v 5.53565"
+           id="path23"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 52.466233,107.26337 v 5.53565"
+           id="path24"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 67.047758,107.26337 v 5.53565"
+           id="path25"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 81.629283,107.26337 v 5.53565"
+           id="path26"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 96.210809,107.26337 v 5.53565"
+           id="path27"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 110.79233,107.26337 v 5.53565"
+           id="path28"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 125.37386,107.26337 v 5.53565"
+           id="path29"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 139.95538,107.26337 v 5.53565"
+           id="path30"
+           sodipodi:nodetypes="cc" />
+        <text
+           xml:space="preserve"
+           style="font-size:3.175px;text-align:start;writing-mode:lr-tb;direction:ltr;text-anchor:start;fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           x="86.713425"
+           y="110.1963"
+           id="text30"><tspan
+             sodipodi:role="line"
+             id="tspan30"
+             style="stroke:#006680;stroke-width:0.5"
+             x="86.713425"
+             y="110.1963">. . .</tspan></text>
+      </g>
+      <g
+         id="g38"
+         transform="translate(24.021362,-32.25779)">
+        <rect
+           style="fill:none;stroke:#006680;stroke-width:0.459999;stroke-dasharray:none"
+           id="rect30"
+           width="131.09708"
+           height="5.8163381"
+           x="23.245802"
+           y="107.16314" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 37.884707,107.26337 v 5.53565"
+           id="path31"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 52.466233,107.26337 v 5.53565"
+           id="path32"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 67.047758,107.26337 v 5.53565"
+           id="path33"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 81.629283,107.26337 v 5.53565"
+           id="path34"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 96.210809,107.26337 v 5.53565"
+           id="path35"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 110.79233,107.26337 v 5.53565"
+           id="path36"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 125.37386,107.26337 v 5.53565"
+           id="path37"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 139.95538,107.26337 v 5.53565"
+           id="path38"
+           sodipodi:nodetypes="cc" />
+        <text
+           xml:space="preserve"
+           style="font-size:3.175px;text-align:start;writing-mode:lr-tb;direction:ltr;text-anchor:start;fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           x="86.713425"
+           y="110.1963"
+           id="text38"><tspan
+             sodipodi:role="line"
+             id="tspan38"
+             style="stroke:#006680;stroke-width:0.5"
+             x="86.713425"
+             y="110.1963">. . .</tspan></text>
+      </g>
+      <text
+         xml:space="preserve"
+         style="font-size:3.175px;text-align:start;writing-mode:lr-tb;direction:ltr;text-anchor:start;fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         x="113.37768"
+         y="68.463142"
+         id="text39"><tspan
+           sodipodi:role="line"
+           id="tspan39"
+           style="writing-mode:tb-rl;stroke-width:0.5"
+           x="113.37768"
+           y="68.463142">. . .</tspan></text>
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+         x="25.005701"
+         y="55.308445"
+         id="text40"><tspan
+           sodipodi:role="line"
+           x="25.005701"
+           y="55.308445"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+           id="tspan40">Pipe[1]</tspan></text>
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+         x="30.264952"
+         y="65.490654"
+         id="text41"><tspan
+           sodipodi:role="line"
+           x="30.264952"
+           y="65.490654"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+           id="tspan41">Queue[0]</tspan></text>
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+         x="30.264952"
+         y="79.249001"
+         id="text42"><tspan
+           sodipodi:role="line"
+           x="30.264952"
+           y="79.249001"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+           id="tspan42">Queue[n]</tspan></text>
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+         x="30.264952"
+         y="71.84066"
+         id="text43"><tspan
+           sodipodi:role="line"
+           x="30.264952"
+           y="71.84066"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+           id="tspan43">...</tspan></text>
+    </g>
+    <g
+       id="g64"
+       transform="translate(0,85.195881)">
+      <rect
+         style="fill:none;stroke:#000000;stroke-width:0.878057;stroke-dasharray:none"
+         id="rect43"
+         width="167.79619"
+         height="24.831829"
+         x="14.21942"
+         y="57.862854" />
+      <g
+         id="g51"
+         transform="translate(24.021362,-46.545299)">
+        <rect
+           style="fill:none;stroke:#006680;stroke-width:0.459999;stroke-dasharray:none"
+           id="rect44"
+           width="131.09708"
+           height="5.8163381"
+           x="23.245802"
+           y="107.16314" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 37.884707,107.26337 v 5.53565"
+           id="path44"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 52.466233,107.26337 v 5.53565"
+           id="path45"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 67.047758,107.26337 v 5.53565"
+           id="path46"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 81.629283,107.26337 v 5.53565"
+           id="path47"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 96.210809,107.26337 v 5.53565"
+           id="path48"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 110.79233,107.26337 v 5.53565"
+           id="path49"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 125.37386,107.26337 v 5.53565"
+           id="path50"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 139.95538,107.26337 v 5.53565"
+           id="path51"
+           sodipodi:nodetypes="cc" />
+        <text
+           xml:space="preserve"
+           style="font-size:3.175px;text-align:start;writing-mode:lr-tb;direction:ltr;text-anchor:start;fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           x="86.713425"
+           y="110.1963"
+           id="text51"><tspan
+             sodipodi:role="line"
+             id="tspan51"
+             style="stroke:#006680;stroke-width:0.5"
+             x="86.713425"
+             y="110.1963">. . .</tspan></text>
+      </g>
+      <g
+         id="g59"
+         transform="translate(24.021362,-32.25779)">
+        <rect
+           style="fill:none;stroke:#006680;stroke-width:0.459999;stroke-dasharray:none"
+           id="rect51"
+           width="131.09708"
+           height="5.8163381"
+           x="23.245802"
+           y="107.16314" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 37.884707,107.26337 v 5.53565"
+           id="path52"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 52.466233,107.26337 v 5.53565"
+           id="path53"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 67.047758,107.26337 v 5.53565"
+           id="path54"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 81.629283,107.26337 v 5.53565"
+           id="path55"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 96.210809,107.26337 v 5.53565"
+           id="path56"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 110.79233,107.26337 v 5.53565"
+           id="path57"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 125.37386,107.26337 v 5.53565"
+           id="path58"
+           sodipodi:nodetypes="cc" />
+        <path
+           style="fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           d="m 139.95538,107.26337 v 5.53565"
+           id="path59"
+           sodipodi:nodetypes="cc" />
+        <text
+           xml:space="preserve"
+           style="font-size:3.175px;text-align:start;writing-mode:lr-tb;direction:ltr;text-anchor:start;fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+           x="86.713425"
+           y="110.1963"
+           id="text59"><tspan
+             sodipodi:role="line"
+             id="tspan59"
+             style="stroke:#006680;stroke-width:0.5"
+             x="86.713425"
+             y="110.1963">. . .</tspan></text>
+      </g>
+      <text
+         xml:space="preserve"
+         style="font-size:3.175px;text-align:start;writing-mode:lr-tb;direction:ltr;text-anchor:start;fill:none;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+         x="113.37768"
+         y="68.463142"
+         id="text60"><tspan
+           sodipodi:role="line"
+           id="tspan60"
+           style="writing-mode:tb-rl;stroke-width:0.5"
+           x="113.37768"
+           y="68.463142">. . .</tspan></text>
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+         x="25.005701"
+         y="55.308445"
+         id="text61"><tspan
+           sodipodi:role="line"
+           x="25.005701"
+           y="55.308445"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+           id="tspan61">Pipe[n]</tspan></text>
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+         x="30.264952"
+         y="65.490654"
+         id="text62"><tspan
+           sodipodi:role="line"
+           x="30.264952"
+           y="65.490654"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+           id="tspan62">Queue[0]</tspan></text>
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+         x="30.264952"
+         y="79.249001"
+         id="text63"><tspan
+           sodipodi:role="line"
+           x="30.264952"
+           y="79.249001"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+           id="tspan63">Queue[n]</tspan></text>
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+         x="30.264952"
+         y="71.84066"
+         id="text64"><tspan
+           sodipodi:role="line"
+           x="30.264952"
+           y="71.84066"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+           id="tspan64">...</tspan></text>
+    </g>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:7.76111px;font-family:Serif;-inkscape-font-specification:Serif;text-align:start;writing-mode:lr-tb;direction:ltr;text-anchor:start;fill:#000000;fill-opacity:0;stroke:#006680;stroke-width:0.499999;stroke-dasharray:none"
+       x="92.18071"
+       y="128.21965"
+       id="text65"><tspan
+         sodipodi:role="line"
+         id="tspan65"
+         style="font-size:7.76111px;writing-mode:tb-rl;fill:#1a1a1a;stroke:#000000;stroke-width:0.5"
+         x="92.18071"
+         y="128.21965">...</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:7.7611px;font-family:Serif;-inkscape-font-specification:Serif;text-align:start;writing-mode:tb-rl;direction:ltr;text-anchor:start;fill:#1a1a1a;fill-opacity:0;stroke:#000000;stroke-width:0.499999;stroke-dasharray:none"
+       x="198.72205"
+       y="80.708267"
+       id="text66"><tspan
+         sodipodi:role="line"
+         id="tspan66"
+         style="stroke-width:0.5"
+         x="198.72205"
+         y="80.708267" /></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="97.905846"
+       y="44.725101"
+       id="text68"><tspan
+         sodipodi:role="line"
+         x="97.905846"
+         y="44.725101"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan68">Hardware Block</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;writing-mode:vertical-lr;text-orientation:upright;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="236.36934"
+       y="112.10503"
+       id="text68-7"><tspan
+         sodipodi:role="line"
+         x="236.36934"
+         y="112.10503"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;writing-mode:vertical-lr;text-orientation:upright;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan68-4">EXECUTION</tspan></text>
+    <rect
+       style="fill:#1a1a1a;fill-opacity:0;stroke:#000000;stroke-width:0.940575;stroke-dasharray:7.5246, 0.940575;stroke-dashoffset:0"
+       id="rect68-1"
+       width="68.749969"
+       height="141.2751"
+       x="-129.49162"
+       y="37.881134"
+       ry="0" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="-114.4223"
+       y="44.966106"
+       id="text68-2"><tspan
+         sodipodi:role="line"
+         x="-114.4223"
+         y="44.966106"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan68-9">Memory</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:12.7px;font-family:Serif;-inkscape-font-specification:Serif;text-align:start;writing-mode:tb-rl;direction:ltr;text-orientation:upright;text-anchor:start;fill:#1a1a1a;fill-opacity:0;stroke:#000000;stroke-width:0.499999;stroke-dasharray:none;stroke-dashoffset:0"
+       x="212.6013"
+       y="64.823341"
+       id="text69"><tspan
+         sodipodi:role="line"
+         id="tspan69"
+         style="font-size:12.7px;stroke-width:0.5"
+         x="212.6013"
+         y="64.823341" /></text>
+    <g
+       id="g72">
+      <path
+         style="font-weight:bold;font-size:16.9333px;line-height:1.25;-inkscape-font-specification:'sans-serif Bold';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;stroke-width:0.0690111"
+         d="m 185.24734,83.96512 h 1.28693 c 1.20226,0.01693 2.09973,-1.015998 2.0828,-2.370662 v -9.393759 c -0.0169,-0.711199 0.33866,-1.066798 1.10066,-1.066798 h 0.2032 0.0677 v -1.557864 c -1.0668,0.01693 -1.38853,-0.270933 -1.37159,-1.236131 V 58.96308 c 0.0339,-1.354664 -0.88054,-2.387595 -2.0828,-2.370662 h -1.28693 v 1.676397 h 0.62653 c 0.57573,0 0.77893,0.321733 0.762,1.100664 v 9.122827 c 0,1.219198 0.44027,1.710263 1.64253,1.862663 -1.20226,0.135466 -1.64253,0.626532 -1.64253,1.84573 v 9.122826 c 0.0169,0.660399 -0.23707,0.965198 -0.762,0.965198 h -0.62653 z"
+         id="text70"
+         aria-label="}"
+         sodipodi:nodetypes="ccccsccccccccscscscscc" />
+      <path
+         style="fill:#1a1a1a;fill-opacity:0;stroke:#000000;stroke-width:1.25;stroke-dasharray:none;stroke-dashoffset:0;marker-end:url(#ArrowWideHeavy)"
+         d="M 190.40199,70.278769 H 224.6663"
+         id="path70" />
+    </g>
+    <path
+       style="font-weight:bold;font-size:16.9333px;line-height:1.25;-inkscape-font-specification:'sans-serif Bold';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;stroke-width:0.0690111"
+       d="m -88.44114,74.337891 h 1.28693 c 1.20226,0.01693 2.09973,-1.015998 2.0828,-2.370662 V 62.57347 c -0.0169,-0.711199 0.33866,-1.066798 1.10066,-1.066798 h 0.2032 0.0677 v -1.557864 c -1.0668,0.01693 -1.38853,-0.270933 -1.37159,-1.236131 v -9.376826 c 0.0339,-1.354664 -0.88054,-2.387595 -2.0828,-2.370662 h -1.28693 v 1.676397 h 0.62653 c 0.57573,0 0.77893,0.321733 0.762,1.100664 v 9.122827 c 0,1.219198 0.44027,1.710263 1.64253,1.862663 -1.20226,0.135466 -1.64253,0.626532 -1.64253,1.84573 v 9.122826 c 0.0169,0.660399 -0.23707,0.965198 -0.762,0.965198 h -0.62653 z"
+       id="text70-1"
+       aria-label="}"
+       sodipodi:nodetypes="ccccsccccccccscscscscc" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:16.9333px;font-family:Serif;-inkscape-font-specification:Serif;text-align:start;writing-mode:tb-rl;direction:ltr;text-orientation:upright;text-anchor:start;fill:#1a1a1a;fill-opacity:0;stroke:#000000;stroke-width:1.25;stroke-dasharray:none;stroke-dashoffset:0"
+       x="200.88817"
+       y="38.990276"
+       id="text72"><tspan
+         sodipodi:role="line"
+         id="tspan72"
+         style="stroke-width:1.25"
+         x="200.88817"
+         y="38.990276" /></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:6.35px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="205.12828"
+       y="60.090775"
+       id="text73"><tspan
+         sodipodi:role="line"
+         x="205.12828"
+         y="60.090775"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:6.35px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan73">e.g.,:</tspan><tspan
+         sodipodi:role="line"
+         x="205.12828"
+         y="68.028275"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:6.35px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan74">queue[0]</tspan></text>
+    <g
+       id="g75"
+       transform="translate(0,40.745853)">
+      <path
+         style="font-weight:bold;font-size:16.9333px;line-height:1.25;-inkscape-font-specification:'sans-serif Bold';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;stroke-width:0.0690111"
+         d="m 185.24734,83.96512 h 1.28693 c 1.20226,0.01693 2.09973,-1.015998 2.0828,-2.370662 v -9.393759 c -0.0169,-0.711199 0.33866,-1.066798 1.10066,-1.066798 h 0.2032 0.0677 v -1.557864 c -1.0668,0.01693 -1.38853,-0.270933 -1.37159,-1.236131 V 58.96308 c 0.0339,-1.354664 -0.88054,-2.387595 -2.0828,-2.370662 h -1.28693 v 1.676397 h 0.62653 c 0.57573,0 0.77893,0.321733 0.762,1.100664 v 9.122827 c 0,1.219198 0.44027,1.710263 1.64253,1.862663 -1.20226,0.135466 -1.64253,0.626532 -1.64253,1.84573 v 9.122826 c 0.0169,0.660399 -0.23707,0.965198 -0.762,0.965198 h -0.62653 z"
+         id="path74"
+         aria-label="}"
+         sodipodi:nodetypes="ccccsccccccccscscscscc" />
+      <path
+         style="fill:#1a1a1a;fill-opacity:0;stroke:#000000;stroke-width:1.25;stroke-dasharray:none;stroke-dashoffset:0;marker-end:url(#ArrowWideHeavy)"
+         d="M 190.40199,70.278769 H 224.6663"
+         id="path75" />
+    </g>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:6.35px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="205.12828"
+       y="100.83664"
+       id="text76"><tspan
+         sodipodi:role="line"
+         x="205.12828"
+         y="100.83664"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:6.35px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan75">e.g.,:</tspan><tspan
+         sodipodi:role="line"
+         x="205.12828"
+         y="108.77414"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:6.35px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan76">queue[4]</tspan></text>
+    <g
+       id="g77"
+       transform="translate(0,85.725048)">
+      <path
+         style="font-weight:bold;font-size:16.9333px;line-height:1.25;-inkscape-font-specification:'sans-serif Bold';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;stroke-width:0.0690111"
+         d="m 185.24734,83.96512 h 1.28693 c 1.20226,0.01693 2.09973,-1.015998 2.0828,-2.370662 v -9.393759 c -0.0169,-0.711199 0.33866,-1.066798 1.10066,-1.066798 h 0.2032 0.0677 v -1.557864 c -1.0668,0.01693 -1.38853,-0.270933 -1.37159,-1.236131 V 58.96308 c 0.0339,-1.354664 -0.88054,-2.387595 -2.0828,-2.370662 h -1.28693 v 1.676397 h 0.62653 c 0.57573,0 0.77893,0.321733 0.762,1.100664 v 9.122827 c 0,1.219198 0.44027,1.710263 1.64253,1.862663 -1.20226,0.135466 -1.64253,0.626532 -1.64253,1.84573 v 9.122826 c 0.0169,0.660399 -0.23707,0.965198 -0.762,0.965198 h -0.62653 z"
+         id="path76"
+         aria-label="}"
+         sodipodi:nodetypes="ccccsccccccccscscscscc" />
+      <path
+         style="fill:#1a1a1a;fill-opacity:0;stroke:#000000;stroke-width:1.25;stroke-dasharray:none;stroke-dashoffset:0;marker-end:url(#ArrowWideHeavy)"
+         d="M 190.40199,70.278769 H 224.6663"
+         id="path77" />
+    </g>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:6.35px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="205.12828"
+       y="145.81558"
+       id="text78"><tspan
+         sodipodi:role="line"
+         x="205.12828"
+         y="145.81558"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:6.35px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan77">e.g.,:</tspan><tspan
+         sodipodi:role="line"
+         x="205.12828"
+         y="153.75308"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:6.35px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan78">queue[n]</tspan></text>
+    <g
+       id="g81">
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+         x="-18.502264"
+         y="65.642387"
+         id="text79"><tspan
+           sodipodi:role="line"
+           x="-18.502264"
+           y="65.642387"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;fill:#800000;stroke-width:0.0690111"
+           id="tspan79">HQD</tspan></text>
+      <ellipse
+         style="fill:#800000;fill-opacity:0;stroke:#800000;stroke-width:1;stroke-dasharray:none;stroke-dashoffset:0"
+         id="path79"
+         cx="-18.407015"
+         cy="63.2188"
+         rx="13.317666"
+         ry="4.4124799" />
+      <path
+         style="fill:#800000;fill-opacity:0;stroke:#800000;stroke-width:1;stroke-dasharray:none;stroke-dashoffset:0;marker-end:url(#Triangle)"
+         d="M -4.6361224,63.2188 H 10.606988"
+         id="path80" />
+    </g>
+    <g
+       id="g82"
+       transform="translate(0,14.287503)">
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+         x="-18.502264"
+         y="65.642387"
+         id="text81"><tspan
+           sodipodi:role="line"
+           x="-18.502264"
+           y="65.642387"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;fill:#800000;stroke-width:0.0690111"
+           id="tspan81">HQD</tspan></text>
+      <ellipse
+         style="fill:#800000;fill-opacity:0;stroke:#800000;stroke-width:1;stroke-dasharray:none;stroke-dashoffset:0"
+         id="ellipse81"
+         cx="-18.407015"
+         cy="63.2188"
+         rx="13.317666"
+         ry="4.4124799" />
+      <path
+         style="fill:#800000;fill-opacity:0;stroke:#800000;stroke-width:1;stroke-dasharray:none;stroke-dashoffset:0;marker-end:url(#Triangle)"
+         d="M -4.6361224,63.2188 H 10.606988"
+         id="path81" />
+    </g>
+    <g
+       id="g83"
+       transform="translate(0,40.745853)">
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+         x="-18.502264"
+         y="65.642387"
+         id="text82"><tspan
+           sodipodi:role="line"
+           x="-18.502264"
+           y="65.642387"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;fill:#800000;stroke-width:0.0690111"
+           id="tspan82">HQD</tspan></text>
+      <ellipse
+         style="fill:#800000;fill-opacity:0;stroke:#800000;stroke-width:1;stroke-dasharray:none;stroke-dashoffset:0"
+         id="ellipse82"
+         cx="-18.407015"
+         cy="63.2188"
+         rx="13.317666"
+         ry="4.4124799" />
+      <path
+         style="fill:#800000;fill-opacity:0;stroke:#800000;stroke-width:1;stroke-dasharray:none;stroke-dashoffset:0;marker-end:url(#Triangle)"
+         d="M -4.6361224,63.2188 H 10.606988"
+         id="path82" />
+    </g>
+    <g
+       id="g84"
+       transform="translate(0,55.033362)">
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+         x="-18.502264"
+         y="65.642387"
+         id="text83"><tspan
+           sodipodi:role="line"
+           x="-18.502264"
+           y="65.642387"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;fill:#800000;stroke-width:0.0690111"
+           id="tspan83">HQD</tspan></text>
+      <ellipse
+         style="fill:#800000;fill-opacity:0;stroke:#800000;stroke-width:1;stroke-dasharray:none;stroke-dashoffset:0"
+         id="ellipse83"
+         cx="-18.407015"
+         cy="63.2188"
+         rx="13.317666"
+         ry="4.4124799" />
+      <path
+         style="fill:#800000;fill-opacity:0;stroke:#800000;stroke-width:1;stroke-dasharray:none;stroke-dashoffset:0;marker-end:url(#Triangle)"
+         d="M -4.6361224,63.2188 H 10.606988"
+         id="path83" />
+    </g>
+    <g
+       id="g85"
+       transform="translate(0,85.195881)">
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+         x="-18.502264"
+         y="65.642387"
+         id="text84"><tspan
+           sodipodi:role="line"
+           x="-18.502264"
+           y="65.642387"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;fill:#800000;stroke-width:0.0690111"
+           id="tspan84">HQD</tspan></text>
+      <ellipse
+         style="fill:#800000;fill-opacity:0;stroke:#800000;stroke-width:1;stroke-dasharray:none;stroke-dashoffset:0"
+         id="ellipse84"
+         cx="-18.407015"
+         cy="63.2188"
+         rx="13.317666"
+         ry="4.4124799" />
+      <path
+         style="fill:#800000;fill-opacity:0;stroke:#800000;stroke-width:1;stroke-dasharray:none;stroke-dashoffset:0;marker-end:url(#Triangle)"
+         d="M -4.6361224,63.2188 H 10.606988"
+         id="path84" />
+    </g>
+    <g
+       id="g86"
+       transform="translate(0,99.48339)">
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+         x="-18.502264"
+         y="65.642387"
+         id="text85"><tspan
+           sodipodi:role="line"
+           x="-18.502264"
+           y="65.642387"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;fill:#800000;stroke-width:0.0690111"
+           id="tspan85">HQD</tspan></text>
+      <ellipse
+         style="fill:#800000;fill-opacity:0;stroke:#800000;stroke-width:1;stroke-dasharray:none;stroke-dashoffset:0"
+         id="ellipse85"
+         cx="-18.407015"
+         cy="63.2188"
+         rx="13.317666"
+         ry="4.4124799" />
+      <path
+         style="fill:#800000;fill-opacity:0;stroke:#800000;stroke-width:1;stroke-dasharray:none;stroke-dashoffset:0;marker-end:url(#Triangle)"
+         d="M -4.6361224,63.2188 H 10.606988"
+         id="path85" />
+    </g>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="-35.2131"
+       y="54.673237"
+       id="text86"><tspan
+         sodipodi:role="line"
+         x="-35.2131"
+         y="54.673237"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan86">Registers</tspan></text>
+    <path
+       style="fill:#800000;fill-opacity:0;stroke:#1a1a1a;stroke-width:1;stroke-dasharray:1, 1;stroke-dashoffset:0;marker-end:url(#ArrowWide)"
+       d="m -45.247972,57.442462 v 5.888987 h 11.344412"
+       id="path86"
+       sodipodi:nodetypes="ccc" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="-74.441521"
+       y="82.441582"
+       id="text87"><tspan
+         sodipodi:role="line"
+         x="-74.441521"
+         y="82.441582"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan87">MQD</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="-74.441521"
+       y="99.240776"
+       id="text88"><tspan
+         sodipodi:role="line"
+         x="-74.441521"
+         y="99.240776"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan88">MQD</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="-74.441521"
+       y="116.03998"
+       id="text89"><tspan
+         sodipodi:role="line"
+         x="-74.441521"
+         y="116.03998"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan89">MQD</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="-74.441521"
+       y="132.83917"
+       id="text90"><tspan
+         sodipodi:role="line"
+         x="-74.441521"
+         y="132.83917"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan90">MQD</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:7.05556px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="-74.441521"
+       y="168.3002"
+       id="text91"><tspan
+         sodipodi:role="line"
+         x="-74.441521"
+         y="168.3002"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:7.05556px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan91">MQD</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="-74.441521"
+       y="148.1461"
+       id="text92"><tspan
+         sodipodi:role="line"
+         x="-74.441521"
+         y="148.1461"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:10.5833px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;writing-mode:tb-rl;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan92">...</tspan></text>
+    <g
+       id="g97"
+       transform="translate(-5.8208336)">
+      <rect
+         style="fill:#aaffcc;fill-opacity:1;stroke:#1a1a1a;stroke-width:0.5;stroke-dasharray:none;stroke-dashoffset:0"
+         id="rect96"
+         width="58.726093"
+         height="27.598055"
+         x="-142.96434"
+         y="46.852512" />
+      <text
+         xml:space="preserve"
+         style="font-style:normal;font-weight:normal;font-size:4.23333px;line-height:1.25;font-family:sans-serif;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+         x="-139.74622"
+         y="52.62756"
+         id="text93"><tspan
+           sodipodi:role="line"
+           x="-139.74622"
+           y="52.62756"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:4.23333px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:start;text-anchor:start;stroke-width:0.0690111"
+           id="tspan93">HQD Registers</tspan><tspan
+           sodipodi:role="line"
+           x="-139.74622"
+           y="57.91922"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:4.23333px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:start;text-anchor:start;stroke-width:0.0690111"
+           id="tspan95">Queue Address in the GPU</tspan><tspan
+           sodipodi:role="line"
+           x="-139.74622"
+           y="63.210884"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:4.23333px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:start;text-anchor:start;stroke-width:0.0690111"
+           id="tspan96">Doorbell</tspan><tspan
+           sodipodi:role="line"
+           x="-139.74622"
+           y="68.502548"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:4.23333px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:start;text-anchor:start;stroke-width:0.0690111"
+           id="tspan97">...</tspan><tspan
+           sodipodi:role="line"
+           x="-139.74622"
+           y="73.794212"
+           style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:4.23333px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:start;text-anchor:start;stroke-width:0.0690111"
+           id="tspan94" /></text>
+    </g>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:4.23333px;line-height:1.25;font-family:sans-serif;text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="-61.293022"
+       y="-19.380915"
+       id="text93-1"><tspan
+         sodipodi:role="line"
+         x="-61.293022"
+         y="-19.380915"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:4.23333px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan104">SWITCH QUEUE:</tspan><tspan
+         sodipodi:role="line"
+         x="-61.293022"
+         y="-14.089252"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:4.23333px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:start;text-anchor:start;stroke-width:0.0690111"
+         id="tspan101">WAIT FOR HQD_ACTIVE = 0</tspan><tspan
+         sodipodi:role="line"
+         x="-61.293022"
+         y="-8.7975903"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:4.23333px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:start;text-anchor:start;stroke-width:0.0690111"
+         id="tspan105">SAVE QUEUE STATE TO THE MQD</tspan><tspan
+         sodipodi:role="line"
+         x="-61.293022"
+         y="-3.505928"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:4.23333px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:start;text-anchor:start;stroke-width:0.0690111"
+         id="tspan102">COPY NEW MQD STATE</tspan><tspan
+         sodipodi:role="line"
+         x="-61.293022"
+         y="1.7857342"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:4.23333px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:start;text-anchor:start;stroke-width:0.0690111"
+         id="tspan103">SET HQD_ACTIVE = 1</tspan></text>
+    <circle
+       style="fill:#ffeeaa;fill-opacity:1;stroke:#1a1a1a;stroke-width:1.88976;stroke-dasharray:none;stroke-dashoffset:0"
+       id="path97"
+       cx="0"
+       cy="0"
+       r="0"
+       transform="matrix(0.26458333,0,0,0.26458333,-149.03517,37.347779)" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.0690111"
+       x="-36.659206"
+       y="-44.828983"
+       id="text106"><tspan
+         sodipodi:role="line"
+         x="-36.659206"
+         y="-44.828983"
+         style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.0690111"
+         id="tspan106">Firmware</tspan></text>
+    <path
+       style="fill:none;fill-opacity:1;stroke:#1a1a1a;stroke-width:1;stroke-dasharray:none;stroke-dashoffset:0;marker-start:url(#marker109);marker-end:url(#ArrowTriangleStylized)"
+       d="M -84.242601,-9.1838245 H -98.041629 V 33.17598"
+       id="path106" />
+    <path
+       style="fill:none;fill-opacity:1;stroke:#1a1a1a;stroke-width:1;stroke-dasharray:none;stroke-dashoffset:0;marker-end:url(#Dot)"
+       d="M 9.9745536,-9.3442784 H 29.549918 V 37.170287"
+       id="path107"
+       sodipodi:nodetypes="ccc" />
+  </g>
+</svg>
diff --git a/Documentation/gpu/xe/xe_firmware.rst b/Documentation/gpu/xe/xe_firmware.rst
index afcb561cd37d..5d23e9f27391 100644
--- a/Documentation/gpu/xe/xe_firmware.rst
+++ b/Documentation/gpu/xe/xe_firmware.rst
@@ -31,6 +31,12 @@ GuC Power Conservation (PC)
 .. kernel-doc:: drivers/gpu/drm/xe/xe_guc_pc.c
    :doc: GuC Power Conservation (PC)
 
+PCIe Gen5 Limitations
+=====================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_device_sysfs.c
+   :doc: PCIe Gen5 Limitations
+
 Internal API
 ============
 
diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c
index 2a059ac0ed27..a495d8a6c2e3 100644
--- a/drivers/dma-buf/dma-fence-unwrap.c
+++ b/drivers/dma-buf/dma-fence-unwrap.c
@@ -79,6 +79,41 @@ static int fence_cmp(const void *_a, const void *_b)
 	return 0;
 }
 
+/**
+ * dma_fence_dedup_array - Sort and deduplicate an array of dma_fence pointers
+ * @fences:     Array of dma_fence pointers to be deduplicated
+ * @num_fences: Number of entries in the @fences array
+ *
+ * Sorts the input array by context, then removes duplicate
+ * fences with the same context, keeping only the most recent one.
+ *
+ * The array is modified in-place and unreferenced duplicate fences are released
+ * via dma_fence_put(). The function returns the new number of fences after
+ * deduplication.
+ *
+ * Return: Number of unique fences remaining in the array.
+ */
+int dma_fence_dedup_array(struct dma_fence **fences, int num_fences)
+{
+	int i, j;
+
+	sort(fences, num_fences, sizeof(*fences), fence_cmp, NULL);
+
+	/*
+	 * Only keep the most recent fence for each context.
+	 */
+	j = 0;
+	for (i = 1; i < num_fences; i++) {
+		if (fences[i]->context == fences[j]->context)
+			dma_fence_put(fences[i]);
+		else
+			fences[++j] = fences[i];
+	}
+
+	return ++j;
+}
+EXPORT_SYMBOL_GPL(dma_fence_dedup_array);
+
 /* Implementation for the dma_fence_merge() marco, don't use directly */
 struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
 					   struct dma_fence **fences,
@@ -87,7 +122,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
 	struct dma_fence *tmp, *unsignaled = NULL, **array;
 	struct dma_fence_array *result;
 	ktime_t timestamp;
-	int i, j, count;
+	int i, count;
 
 	count = 0;
 	timestamp = ns_to_ktime(0);
@@ -141,19 +176,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
 	if (count == 0 || count == 1)
 		goto return_fastpath;
 
-	sort(array, count, sizeof(*array), fence_cmp, NULL);
-
-	/*
-	 * Only keep the most recent fence for each context.
-	 */
-	j = 0;
-	for (i = 1; i < count; i++) {
-		if (array[i]->context == array[j]->context)
-			dma_fence_put(array[i]);
-		else
-			array[++j] = array[i];
-	}
-	count = ++j;
+	count = dma_fence_dedup_array(array, count);
 
 	if (count > 1) {
 		result = dma_fence_array_create(count, array,
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index aacc810cabb3..87080c06e5fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -66,7 +66,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
 	amdgpu_fw_attestation.o amdgpu_securedisplay.o \
 	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
 	amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
-	amdgpu_cper.o
+	amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
@@ -174,7 +174,10 @@ amdgpu-y += \
 amdgpu-y += \
 	amdgpu_mes.o \
 	mes_v11_0.o \
-	mes_v12_0.o
+	mes_v12_0.o \
+
+# add GFX userqueue support
+amdgpu-y += mes_userqueue.o
 
 # add UVD block
 amdgpu-y += \
@@ -253,6 +256,8 @@ amdgpu-y += \
 # add amdkfd interfaces
 amdgpu-y += amdgpu_amdkfd.o
 
+# add gfx usermode queue
+amdgpu-y += amdgpu_userq.o
 
 ifneq ($(CONFIG_HSA_AMD),)
 AMDKFD_PATH := ../amdkfd
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ef6e78224fdf..836ea081088a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -113,6 +113,8 @@
 #include "amdgpu_xcp.h"
 #include "amdgpu_seq64.h"
 #include "amdgpu_reg_state.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_eviction_fence.h"
 #if defined(CONFIG_DRM_AMD_ISP)
 #include "amdgpu_isp.h"
 #endif
@@ -228,7 +230,7 @@ extern int amdgpu_force_asic_type;
 extern int amdgpu_smartshift_bias;
 extern int amdgpu_use_xgmi_p2p;
 extern int amdgpu_mtype_local;
-extern bool enforce_isolation;
+extern int amdgpu_enforce_isolation;
 #ifdef CONFIG_HSA_AMD
 extern int sched_policy;
 extern bool debug_evictions;
@@ -266,8 +268,10 @@ extern int amdgpu_umsch_mm_fwlog;
 
 extern int amdgpu_user_partt_mode;
 extern int amdgpu_agp;
+extern int amdgpu_rebar;
 
 extern int amdgpu_wbrf;
+extern int amdgpu_user_queue;
 
 #define AMDGPU_VM_MAX_NUM_CTX			4096
 #define AMDGPU_SG_THRESHOLD			(256*1024*1024)
@@ -488,7 +492,6 @@ struct amdgpu_flip_work {
 	bool				async;
 };
 
-
 /*
  * file private structure
  */
@@ -501,6 +504,11 @@ struct amdgpu_fpriv {
 	struct mutex		bo_list_lock;
 	struct idr		bo_list_handles;
 	struct amdgpu_ctx_mgr	ctx_mgr;
+	struct amdgpu_userq_mgr	userq_mgr;
+
+	/* Eviction fence infra */
+	struct amdgpu_eviction_fence_mgr evf_mgr;
+
 	/** GPU partition selection */
 	uint32_t		xcp_id;
 };
@@ -512,12 +520,62 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
  */
 #define AMDGPU_MAX_WB 1024	/* Reserve at most 1024 WB slots for amdgpu-owned rings. */
 
+/**
+ * amdgpu_wb - This struct is used for small GPU memory allocation.
+ *
+ * This struct is used to allocate a small amount of GPU memory that can be
+ * used to shadow certain states into the memory. This is especially useful for
+ * providing easy CPU access to some states without requiring register access
+ * (e.g., if some block is power gated, reading register may be problematic).
+ *
+ * Note: the term writeback was initially used because many of the amdgpu
+ * components had some level of writeback memory, and this struct initially
+ * described those components.
+ */
 struct amdgpu_wb {
+
+	/**
+	 * @wb_obj:
+	 *
+	 * Buffer Object used for the writeback memory.
+	 */
 	struct amdgpu_bo	*wb_obj;
+
+	/**
+	 * @wb:
+	 *
+	 * Pointer to the first writeback slot. In terms of CPU address
+	 * this value can be accessed directly by using the offset as an index.
+	 * For the GPU address, it is necessary to use gpu_addr and the offset.
+	 */
 	volatile uint32_t	*wb;
+
+	/**
+	 * @gpu_addr:
+	 *
+	 * Writeback base address in the GPU.
+	 */
 	uint64_t		gpu_addr;
-	u32			num_wb;	/* Number of wb slots actually reserved for amdgpu. */
+
+	/**
+	 * @num_wb:
+	 *
+	 * Number of writeback slots reserved for amdgpu.
+	 */
+	u32			num_wb;
+
+	/**
+	 * @used:
+	 *
+	 * Track the writeback slot already used.
+	 */
 	unsigned long		used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
+
+	/**
+	 * @lock:
+	 *
+	 * Protects read and write of the used field array.
+	 */
 	spinlock_t		lock;
 };
 
@@ -551,6 +609,7 @@ struct amdgpu_allowed_register_entry {
  *                   are reset depends on the ASIC. Notably doesn't reset IPs
  *                   shared with the CPU on APUs or the memory controllers (so
  *                   VRAM is not lost). Not available on all ASICs.
+ * @AMD_RESET_LINK: Triggers SW-UP link reset on other GPUs
  * @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card
  *                  but without powering off the PCI bus. Suitable only for
  *                  discrete GPUs.
@@ -568,6 +627,7 @@ enum amd_reset_method {
 	AMD_RESET_METHOD_MODE0,
 	AMD_RESET_METHOD_MODE1,
 	AMD_RESET_METHOD_MODE2,
+	AMD_RESET_METHOD_LINK,
 	AMD_RESET_METHOD_BACO,
 	AMD_RESET_METHOD_PCI,
 	AMD_RESET_METHOD_ON_INIT,
@@ -821,6 +881,11 @@ struct amdgpu_mqd_prop {
 	uint32_t hqd_queue_priority;
 	bool allow_tunneling;
 	bool hqd_active;
+	uint64_t shadow_addr;
+	uint64_t gds_bkup_addr;
+	uint64_t csa_addr;
+	uint64_t fence_address;
+	bool tmz_queue;
 };
 
 struct amdgpu_mqd {
@@ -829,6 +894,12 @@ struct amdgpu_mqd {
 			struct amdgpu_mqd_prop *p);
 };
 
+struct amdgpu_pcie_reset_ctx {
+	bool in_link_reset;
+	bool occurs_dpc;
+	bool audio_suspended;
+};
+
 /*
  * Custom Init levels could be defined for different situations where a full
  * initialization of all hardware blocks are not expected. Sample cases are
@@ -853,6 +924,14 @@ struct amdgpu_init_level {
 struct amdgpu_reset_domain;
 struct amdgpu_fru_info;
 
+enum amdgpu_enforce_isolation_mode {
+	AMDGPU_ENFORCE_ISOLATION_DISABLE = 0,
+	AMDGPU_ENFORCE_ISOLATION_ENABLE = 1,
+	AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY = 2,
+	AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER = 3,
+};
+
+
 /*
  * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise.
  */
@@ -1081,6 +1160,13 @@ struct amdgpu_device {
 	bool                            enable_uni_mes;
 	struct amdgpu_mes               mes;
 	struct amdgpu_mqd               mqds[AMDGPU_HW_IP_NUM];
+	const struct amdgpu_userq_funcs *userq_funcs[AMDGPU_HW_IP_NUM];
+
+	/* xarray used to retrieve the user queue fence driver reference
+	 * in the EOP interrupt handler to signal the particular user
+	 * queue fence.
+	 */
+	struct xarray			userq_xa;
 
 	/* df */
 	struct amdgpu_df                df;
@@ -1160,6 +1246,8 @@ struct amdgpu_device {
 	struct pci_saved_state          *pci_state;
 	pci_channel_state_t		pci_channel_state;
 
+	struct amdgpu_pcie_reset_ctx	pcie_reset_ctx;
+
 	/* Track auto wait count on s_barrier settings */
 	bool				barrier_has_auto_waitcnt;
 
@@ -1193,10 +1281,11 @@ struct amdgpu_device {
 	bool                            debug_enable_ras_aca;
 	bool                            debug_exp_resets;
 	bool                            debug_disable_gpu_ring_reset;
+	bool                            debug_vm_userptr;
 
 	/* Protection for the following isolation structure */
 	struct mutex                    enforce_isolation_mutex;
-	bool				enforce_isolation[MAX_XCP];
+	enum amdgpu_enforce_isolation_mode	enforce_isolation[MAX_XCP];
 	struct amdgpu_isolation {
 		void			*owner;
 		struct dma_fence	*spearhead;
@@ -1210,6 +1299,10 @@ struct amdgpu_device {
 	 * in KFD: VRAM or GTT.
 	 */
 	bool                            apu_prefer_gtt;
+
+	struct list_head		userq_mgr_list;
+	struct mutex                    userq_mutex;
+	bool                            userq_halt_for_enforce_isolation;
 };
 
 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
@@ -1464,6 +1557,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 					     const u32 array_size);
 
 int amdgpu_device_mode1_reset(struct amdgpu_device *adev);
+int amdgpu_device_link_reset(struct amdgpu_device *adev);
 bool amdgpu_device_supports_atpx(struct drm_device *dev);
 bool amdgpu_device_supports_px(struct drm_device *dev);
 bool amdgpu_device_supports_boco(struct drm_device *dev);
@@ -1614,11 +1708,9 @@ static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_cap
 #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
 bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
 bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
-void amdgpu_choose_low_power_state(struct amdgpu_device *adev);
 #else
 static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
 static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
-static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { }
 #endif
 
 void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
index b4ad163f42a7..3835f2592914 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -120,6 +120,9 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st
 	for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
 		RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n",
 			      idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
+
+	if (ACA_REG__STATUS__SCRUB(bank->regs[ACA_REG_IDX_STATUS]))
+		RAS_EVENT_LOG(adev, event_id, HW_ERR "hardware error logged by the scrubber\n");
 }
 
 static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index b7f8f2ff143d..707e131f89d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1533,22 +1533,4 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
 #endif /* CONFIG_AMD_PMC */
 }
 
-/**
- * amdgpu_choose_low_power_state
- *
- * @adev: amdgpu_device_pointer
- *
- * Choose the target low power state for the GPU
- */
-void amdgpu_choose_low_power_state(struct amdgpu_device *adev)
-{
-	if (adev->in_runpm)
-		return;
-
-	if (amdgpu_acpi_is_s0ix_active(adev))
-		adev->in_s0ix = true;
-	else if (amdgpu_acpi_is_s3_active(adev))
-		adev->in_s3 = true;
-}
-
 #endif /* CONFIG_SUSPEND */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d2ec4130a316..260165bbe373 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2559,6 +2559,18 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 			if (ret != -EFAULT)
 				return ret;
 
+			/* If applications unmap memory before destroying the userptr
+			 * from the KFD, trigger a segmentation fault in VM debug mode.
+			 */
+			if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
+				pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
+								pid_nr(process_info->pid), mem->va);
+
+				// Send GPU VM fault to user space
+				kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid),
+								mem->va);
+			}
+
 			ret = 0;
 		}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 68bce6a6d09d..004a6a9d6b9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -252,83 +252,22 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 
 		if (!adev->pm.fw) {
 			switch (adev->asic_type) {
-			case CHIP_TAHITI:
-				strcpy(fw_name, "radeon/tahiti_smc.bin");
-				break;
-			case CHIP_PITCAIRN:
-				if ((adev->pdev->revision == 0x81) &&
-				    ((adev->pdev->device == 0x6810) ||
-				    (adev->pdev->device == 0x6811))) {
-					info->is_kicker = true;
-					strcpy(fw_name, "radeon/pitcairn_k_smc.bin");
-				} else {
-					strcpy(fw_name, "radeon/pitcairn_smc.bin");
-				}
-				break;
-			case CHIP_VERDE:
-				if (((adev->pdev->device == 0x6820) &&
-					((adev->pdev->revision == 0x81) ||
-					(adev->pdev->revision == 0x83))) ||
-				    ((adev->pdev->device == 0x6821) &&
-					((adev->pdev->revision == 0x83) ||
-					(adev->pdev->revision == 0x87))) ||
-				    ((adev->pdev->revision == 0x87) &&
-					((adev->pdev->device == 0x6823) ||
-					(adev->pdev->device == 0x682b)))) {
-					info->is_kicker = true;
-					strcpy(fw_name, "radeon/verde_k_smc.bin");
-				} else {
-					strcpy(fw_name, "radeon/verde_smc.bin");
-				}
-				break;
-			case CHIP_OLAND:
-				if (((adev->pdev->revision == 0x81) &&
-					((adev->pdev->device == 0x6600) ||
-					(adev->pdev->device == 0x6604) ||
-					(adev->pdev->device == 0x6605) ||
-					(adev->pdev->device == 0x6610))) ||
-				    ((adev->pdev->revision == 0x83) &&
-					(adev->pdev->device == 0x6610))) {
-					info->is_kicker = true;
-					strcpy(fw_name, "radeon/oland_k_smc.bin");
-				} else {
-					strcpy(fw_name, "radeon/oland_smc.bin");
-				}
-				break;
-			case CHIP_HAINAN:
-				if (((adev->pdev->revision == 0x81) &&
-					(adev->pdev->device == 0x6660)) ||
-				    ((adev->pdev->revision == 0x83) &&
-					((adev->pdev->device == 0x6660) ||
-					(adev->pdev->device == 0x6663) ||
-					(adev->pdev->device == 0x6665) ||
-					 (adev->pdev->device == 0x6667)))) {
-					info->is_kicker = true;
-					strcpy(fw_name, "radeon/hainan_k_smc.bin");
-				} else if ((adev->pdev->revision == 0xc3) &&
-					 (adev->pdev->device == 0x6665)) {
-					info->is_kicker = true;
-					strcpy(fw_name, "radeon/banks_k_2_smc.bin");
-				} else {
-					strcpy(fw_name, "radeon/hainan_smc.bin");
-				}
-				break;
 			case CHIP_BONAIRE:
 				if ((adev->pdev->revision == 0x80) ||
 					(adev->pdev->revision == 0x81) ||
 					(adev->pdev->device == 0x665f)) {
 					info->is_kicker = true;
-					strcpy(fw_name, "amdgpu/bonaire_k_smc.bin");
+					strscpy(fw_name, "amdgpu/bonaire_k_smc.bin");
 				} else {
-					strcpy(fw_name, "amdgpu/bonaire_smc.bin");
+					strscpy(fw_name, "amdgpu/bonaire_smc.bin");
 				}
 				break;
 			case CHIP_HAWAII:
 				if (adev->pdev->revision == 0x80) {
 					info->is_kicker = true;
-					strcpy(fw_name, "amdgpu/hawaii_k_smc.bin");
+					strscpy(fw_name, "amdgpu/hawaii_k_smc.bin");
 				} else {
-					strcpy(fw_name, "amdgpu/hawaii_smc.bin");
+					strscpy(fw_name, "amdgpu/hawaii_smc.bin");
 				}
 				break;
 			case CHIP_TOPAZ:
@@ -338,76 +277,76 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 				    ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) ||
 				    ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) {
 					info->is_kicker = true;
-					strcpy(fw_name, "amdgpu/topaz_k_smc.bin");
+					strscpy(fw_name, "amdgpu/topaz_k_smc.bin");
 				} else
-					strcpy(fw_name, "amdgpu/topaz_smc.bin");
+					strscpy(fw_name, "amdgpu/topaz_smc.bin");
 				break;
 			case CHIP_TONGA:
 				if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) ||
 				    ((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1))) {
 					info->is_kicker = true;
-					strcpy(fw_name, "amdgpu/tonga_k_smc.bin");
+					strscpy(fw_name, "amdgpu/tonga_k_smc.bin");
 				} else
-					strcpy(fw_name, "amdgpu/tonga_smc.bin");
+					strscpy(fw_name, "amdgpu/tonga_smc.bin");
 				break;
 			case CHIP_FIJI:
-				strcpy(fw_name, "amdgpu/fiji_smc.bin");
+				strscpy(fw_name, "amdgpu/fiji_smc.bin");
 				break;
 			case CHIP_POLARIS11:
 				if (type == CGS_UCODE_ID_SMU) {
 					if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision)) {
 						info->is_kicker = true;
-						strcpy(fw_name, "amdgpu/polaris11_k_smc.bin");
+						strscpy(fw_name, "amdgpu/polaris11_k_smc.bin");
 					} else if (ASICID_IS_P31(adev->pdev->device, adev->pdev->revision)) {
 						info->is_kicker = true;
-						strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
+						strscpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
 					} else {
-						strcpy(fw_name, "amdgpu/polaris11_smc.bin");
+						strscpy(fw_name, "amdgpu/polaris11_smc.bin");
 					}
 				} else if (type == CGS_UCODE_ID_SMU_SK) {
-					strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
+					strscpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
 				}
 				break;
 			case CHIP_POLARIS10:
 				if (type == CGS_UCODE_ID_SMU) {
 					if (ASICID_IS_P20(adev->pdev->device, adev->pdev->revision)) {
 						info->is_kicker = true;
-						strcpy(fw_name, "amdgpu/polaris10_k_smc.bin");
+						strscpy(fw_name, "amdgpu/polaris10_k_smc.bin");
 					} else if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision)) {
 						info->is_kicker = true;
-						strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
+						strscpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
 					} else {
-						strcpy(fw_name, "amdgpu/polaris10_smc.bin");
+						strscpy(fw_name, "amdgpu/polaris10_smc.bin");
 					}
 				} else if (type == CGS_UCODE_ID_SMU_SK) {
-					strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
+					strscpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
 				}
 				break;
 			case CHIP_POLARIS12:
 				if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) {
 					info->is_kicker = true;
-					strcpy(fw_name, "amdgpu/polaris12_k_smc.bin");
+					strscpy(fw_name, "amdgpu/polaris12_k_smc.bin");
 				} else {
-					strcpy(fw_name, "amdgpu/polaris12_smc.bin");
+					strscpy(fw_name, "amdgpu/polaris12_smc.bin");
 				}
 				break;
 			case CHIP_VEGAM:
-				strcpy(fw_name, "amdgpu/vegam_smc.bin");
+				strscpy(fw_name, "amdgpu/vegam_smc.bin");
 				break;
 			case CHIP_VEGA10:
 				if ((adev->pdev->device == 0x687f) &&
 					((adev->pdev->revision == 0xc0) ||
 					(adev->pdev->revision == 0xc1) ||
 					(adev->pdev->revision == 0xc3)))
-					strcpy(fw_name, "amdgpu/vega10_acg_smc.bin");
+					strscpy(fw_name, "amdgpu/vega10_acg_smc.bin");
 				else
-					strcpy(fw_name, "amdgpu/vega10_smc.bin");
+					strscpy(fw_name, "amdgpu/vega10_smc.bin");
 				break;
 			case CHIP_VEGA12:
-				strcpy(fw_name, "amdgpu/vega12_smc.bin");
+				strscpy(fw_name, "amdgpu/vega12_smc.bin");
 				break;
 			case CHIP_VEGA20:
-				strcpy(fw_name, "amdgpu/vega20_smc.bin");
+				strscpy(fw_name, "amdgpu/vega20_smc.bin");
 				break;
 			default:
 				DRM_ERROR("SMC firmware not supported\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
index 360e07a5c7c1..5a234eadae8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
@@ -549,7 +549,7 @@ int amdgpu_cper_init(struct amdgpu_device *adev)
 {
 	int r;
 
-	if (!amdgpu_aca_is_enabled(adev))
+	if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
 		return 0;
 
 	r = amdgpu_cper_ring_init(adev);
@@ -568,7 +568,7 @@ int amdgpu_cper_init(struct amdgpu_device *adev)
 
 int amdgpu_cper_fini(struct amdgpu_device *adev)
 {
-	if (!amdgpu_aca_is_enabled(adev))
+	if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
 		return 0;
 
 	adev->cper.enabled = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 82df06a72ee0..9ea0d9b71f48 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -296,7 +296,25 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
 				       num_ibs[i], &p->jobs[i]);
 		if (ret)
 			goto free_all_kdata;
-		p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id];
+		switch (p->adev->enforce_isolation[fpriv->xcp_id]) {
+		case AMDGPU_ENFORCE_ISOLATION_DISABLE:
+		default:
+			p->jobs[i]->enforce_isolation = false;
+			p->jobs[i]->run_cleaner_shader = false;
+			break;
+		case AMDGPU_ENFORCE_ISOLATION_ENABLE:
+			p->jobs[i]->enforce_isolation = true;
+			p->jobs[i]->run_cleaner_shader = true;
+			break;
+		case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY:
+			p->jobs[i]->enforce_isolation = true;
+			p->jobs[i]->run_cleaner_shader = false;
+			break;
+		case AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER:
+			p->jobs[i]->enforce_isolation = true;
+			p->jobs[i]->run_cleaner_shader = false;
+			break;
+		}
 	}
 	p->gang_leader = p->jobs[p->gang_leader_idx];
 
@@ -349,6 +367,10 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
 	ring = amdgpu_job_ring(job);
 	ib = &job->ibs[job->num_ibs++];
 
+	/* submissions to kernel queues are disabled */
+	if (ring->no_user_submission)
+		return -EINVAL;
+
 	/* MM engine doesn't support user fences */
 	if (p->uf_bo && ring->funcs->no_user_fence)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7f354cd532dc..4d1b54f58495 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -85,6 +85,7 @@
 
 #if IS_ENABLED(CONFIG_X86)
 #include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
 #endif
 
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
@@ -1680,6 +1681,9 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev))
 		return 0;
 
+	if (!amdgpu_rebar)
+		return 0;
+
 	/* resizing on Dell G5 SE platforms causes problems with runtime pm */
 	if ((amdgpu_runtime_pm != 0) &&
 	    adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
@@ -1870,6 +1874,35 @@ static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device
 	return true;
 }
 
+static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
+{
+#if IS_ENABLED(CONFIG_X86)
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
+		  amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1)))
+		return false;
+
+	if (c->x86 == 6 &&
+		adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) {
+		switch (c->x86_model) {
+		case VFM_MODEL(INTEL_ALDERLAKE):
+		case VFM_MODEL(INTEL_ALDERLAKE_L):
+		case VFM_MODEL(INTEL_RAPTORLAKE):
+		case VFM_MODEL(INTEL_RAPTORLAKE_P):
+		case VFM_MODEL(INTEL_RAPTORLAKE_S):
+			return true;
+		default:
+			return false;
+		}
+	} else {
+		return false;
+	}
+#else
+	return false;
+#endif
+}
+
 /**
  * amdgpu_device_should_use_aspm - check if the device should program ASPM
  *
@@ -1894,7 +1927,7 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
 	}
 	if (adev->flags & AMD_IS_APU)
 		return false;
-	if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
+	if (amdgpu_device_aspm_support_quirk(adev))
 		return false;
 	return pcie_aspm_enabled(adev->pdev);
 }
@@ -2112,8 +2145,31 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 
 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
 
-	for (i = 0; i < MAX_XCP; i++)
-		adev->enforce_isolation[i] = !!enforce_isolation;
+	for (i = 0; i < MAX_XCP; i++) {
+		switch (amdgpu_enforce_isolation) {
+		case -1:
+		case 0:
+		default:
+			/* disable */
+			adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
+			break;
+		case 1:
+			/* enable */
+			adev->enforce_isolation[i] =
+				AMDGPU_ENFORCE_ISOLATION_ENABLE;
+			break;
+		case 2:
+			/* enable legacy mode */
+			adev->enforce_isolation[i] =
+				AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
+			break;
+		case 3:
+			/* enable only process isolation without submitting cleaner shader */
+			adev->enforce_isolation[i] =
+				AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
+			break;
+		}
+	}
 
 	return 0;
 }
@@ -2689,6 +2745,13 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 		break;
 	}
 
+	/* Check for IP version 9.4.3 with A0 hardware */
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
+	    !amdgpu_device_get_rev_id(adev)) {
+		dev_err(adev->dev, "Unsupported A0 hardware\n");
+		return -ENODEV;	/* device unsupported - no device error */
+	}
+
 	if (amdgpu_has_atpx() &&
 	    (amdgpu_is_atpx_hybrid() ||
 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
@@ -2701,7 +2764,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
 	}
 
-
 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
@@ -3172,6 +3234,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
 	 * always assumed to be lost.
 	 */
 	switch (amdgpu_asic_reset_method(adev)) {
+	case AMD_RESET_METHOD_LINK:
 	case AMD_RESET_METHOD_BACO:
 	case AMD_RESET_METHOD_MODE1:
 		return true;
@@ -3455,6 +3518,7 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
 
 	amdgpu_amdkfd_suspend(adev, false);
+	amdgpu_userq_suspend(adev);
 
 	/* Workaround for ASICs need to disable SMC first */
 	amdgpu_device_smu_fini_early(adev);
@@ -4307,9 +4371,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		amdgpu_sync_create(&adev->isolation[i].active);
 		amdgpu_sync_create(&adev->isolation[i].prev);
 	}
-	mutex_init(&adev->gfx.kfd_sch_mutex);
+	mutex_init(&adev->gfx.userq_sch_mutex);
 	mutex_init(&adev->gfx.workload_profile_mutex);
 	mutex_init(&adev->vcn.workload_profile_mutex);
+	mutex_init(&adev->userq_mutex);
 
 	amdgpu_device_init_apu_flags(adev);
 
@@ -4329,12 +4394,16 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	spin_lock_init(&adev->virt.rlcg_reg_lock);
 	spin_lock_init(&adev->wb.lock);
 
+	xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ);
+
 	INIT_LIST_HEAD(&adev->reset_list);
 
 	INIT_LIST_HEAD(&adev->ras_list);
 
 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
 
+	INIT_LIST_HEAD(&adev->userq_mgr_list);
+
 	INIT_DELAYED_WORK(&adev->delayed_init_work,
 			  amdgpu_device_delayed_init_work_handler);
 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
@@ -4907,28 +4976,20 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
  * @data: data
  *
  * This function is called when the system is about to suspend or hibernate.
- * It is used to evict resources from the device before the system goes to
- * sleep while there is still access to swap.
+ * It is used to set the appropriate flags so that eviction can be optimized
+ * in the pm prepare callback.
  */
 static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
 				     void *data)
 {
 	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
-	int r;
 
 	switch (mode) {
 	case PM_HIBERNATION_PREPARE:
 		adev->in_s4 = true;
-		fallthrough;
-	case PM_SUSPEND_PREPARE:
-		r = amdgpu_device_evict_resources(adev);
-		/*
-		 * This is considered non-fatal at this time because
-		 * amdgpu_device_prepare() will also fatally evict resources.
-		 * See https://gitlab.freedesktop.org/drm/amd/-/issues/3781
-		 */
-		if (r)
-			drm_warn(adev_to_drm(adev), "Failed to evict resources, freeze active processes if problems occur: %d\n", r);
+		break;
+	case PM_POST_HIBERNATION:
+		adev->in_s4 = false;
 		break;
 	}
 
@@ -4949,15 +5010,13 @@ int amdgpu_device_prepare(struct drm_device *dev)
 	struct amdgpu_device *adev = drm_to_adev(dev);
 	int i, r;
 
-	amdgpu_choose_low_power_state(adev);
-
 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;
 
 	/* Evict the majority of BOs before starting suspend sequence */
 	r = amdgpu_device_evict_resources(adev);
 	if (r)
-		goto unprepare;
+		return r;
 
 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
 
@@ -4968,15 +5027,10 @@ int amdgpu_device_prepare(struct drm_device *dev)
 			continue;
 		r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
 		if (r)
-			goto unprepare;
+			return r;
 	}
 
 	return 0;
-
-unprepare:
-	adev->in_s0ix = adev->in_s3 = adev->in_s4 = false;
-
-	return r;
 }
 
 /**
@@ -5018,8 +5072,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
 
 	amdgpu_device_ip_suspend_phase1(adev);
 
-	if (!adev->in_s0ix)
+	if (!adev->in_s0ix) {
 		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
+		amdgpu_userq_suspend(adev);
+	}
 
 	r = amdgpu_device_evict_resources(adev);
 	if (r)
@@ -5086,6 +5142,10 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
 		if (r)
 			goto exit;
+
+		r = amdgpu_userq_resume(adev);
+		if (r)
+			goto exit;
 	}
 
 	r = amdgpu_device_ip_late_init(adev);
@@ -5134,9 +5194,6 @@ exit:
 	}
 	adev->in_suspend = false;
 
-	if (adev->enable_mes)
-		amdgpu_mes_self_test(adev);
-
 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
 		DRM_WARN("smart shift update failed\n");
 
@@ -5517,6 +5574,29 @@ mode1_reset_failed:
 	return ret;
 }
 
+int amdgpu_device_link_reset(struct amdgpu_device *adev)
+{
+	int ret = 0;
+
+	dev_info(adev->dev, "GPU link reset\n");
+
+	if (!adev->pcie_reset_ctx.occurs_dpc)
+		ret = amdgpu_dpm_link_reset(adev);
+
+	if (ret)
+		goto link_reset_failed;
+
+	ret = amdgpu_psp_wait_for_bootloader(adev);
+	if (ret)
+		goto link_reset_failed;
+
+	return 0;
+
+link_reset_failed:
+	dev_err(adev->dev, "GPU link reset failed\n");
+	return ret;
+}
+
 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 				 struct amdgpu_reset_context *reset_context)
 {
@@ -5821,6 +5901,7 @@ static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
 
 	switch (amdgpu_asic_reset_method(adev)) {
 	case AMD_RESET_METHOD_MODE1:
+	case AMD_RESET_METHOD_LINK:
 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
 		break;
 	case AMD_RESET_METHOD_MODE2:
@@ -5937,94 +6018,42 @@ static int amdgpu_device_health_check(struct list_head *device_list_handle)
 	return ret;
 }
 
-/**
- * amdgpu_device_gpu_recover - reset the asic and recover scheduler
- *
- * @adev: amdgpu_device pointer
- * @job: which job trigger hang
- * @reset_context: amdgpu reset context pointer
- *
- * Attempt to reset the GPU if it has hung (all asics).
- * Attempt to do soft-reset or full-reset and reinitialize Asic
- * Returns 0 for success or an error on failure.
- */
-
-int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+static int amdgpu_device_halt_activities(struct amdgpu_device *adev,
 			      struct amdgpu_job *job,
-			      struct amdgpu_reset_context *reset_context)
+			      struct amdgpu_reset_context *reset_context,
+			      struct list_head *device_list,
+			      struct amdgpu_hive_info *hive,
+			      bool need_emergency_restart)
 {
-	struct list_head device_list, *device_list_handle =  NULL;
-	bool job_signaled = false;
-	struct amdgpu_hive_info *hive = NULL;
+	struct list_head *device_list_handle =  NULL;
 	struct amdgpu_device *tmp_adev = NULL;
 	int i, r = 0;
-	bool need_emergency_restart = false;
-	bool audio_suspended = false;
-	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
 
 	/*
-	 * If it reaches here because of hang/timeout and a RAS error is
-	 * detected at the same time, let RAS recovery take care of it.
-	 */
-	if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
-	    !amdgpu_sriov_vf(adev) &&
-	    reset_context->src != AMDGPU_RESET_SRC_RAS) {
-		dev_dbg(adev->dev,
-			"Gpu recovery from source: %d yielding to RAS error recovery handling",
-			reset_context->src);
-		return 0;
-	}
-	/*
-	 * Special case: RAS triggered and full reset isn't supported
-	 */
-	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
-
-	/*
-	 * Flush RAM to disk so that after reboot
-	 * the user can read log and see why the system rebooted.
-	 */
-	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
-		amdgpu_ras_get_context(adev)->reboot) {
-		DRM_WARN("Emergency reboot.");
-
-		ksys_sync_helper();
-		emergency_restart();
-	}
-
-	dev_info(adev->dev, "GPU %s begin!\n",
-		need_emergency_restart ? "jobs stop":"reset");
-
-	if (!amdgpu_sriov_vf(adev))
-		hive = amdgpu_get_xgmi_hive(adev);
-	if (hive)
-		mutex_lock(&hive->hive_lock);
-
-	reset_context->job = job;
-	reset_context->hive = hive;
-	/*
 	 * Build list of devices to reset.
 	 * In case we are in XGMI hive mode, resort the device list
 	 * to put adev in the 1st position.
 	 */
-	INIT_LIST_HEAD(&device_list);
 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
-			list_add_tail(&tmp_adev->reset_list, &device_list);
+			list_add_tail(&tmp_adev->reset_list, device_list);
 			if (adev->shutdown)
 				tmp_adev->shutdown = true;
+			if (adev->pcie_reset_ctx.occurs_dpc)
+				tmp_adev->pcie_reset_ctx.in_link_reset = true;
 		}
-		if (!list_is_first(&adev->reset_list, &device_list))
-			list_rotate_to_front(&adev->reset_list, &device_list);
-		device_list_handle = &device_list;
+		if (!list_is_first(&adev->reset_list, device_list))
+			list_rotate_to_front(&adev->reset_list, device_list);
+		device_list_handle = device_list;
 	} else {
-		list_add_tail(&adev->reset_list, &device_list);
-		device_list_handle = &device_list;
+		list_add_tail(&adev->reset_list, device_list);
+		device_list_handle = device_list;
 	}
 
-	if (!amdgpu_sriov_vf(adev)) {
+	if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) {
 		r = amdgpu_device_health_check(device_list_handle);
 		if (r)
-			goto end_reset;
+			return r;
 	}
 
 	/* We need to lock reset domain only once both for XGMI and single device */
@@ -6048,7 +6077,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		 * some audio codec errors.
 		 */
 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
-			audio_suspended = true;
+			tmp_adev->pcie_reset_ctx.audio_suspended = true;
 
 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
 
@@ -6066,6 +6095,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
 		/* disable ras on ALL IPs */
 		if (!need_emergency_restart &&
+		      (!adev->pcie_reset_ctx.occurs_dpc) &&
 		      amdgpu_device_ip_need_full_reset(tmp_adev))
 			amdgpu_ras_suspend(tmp_adev);
 
@@ -6083,24 +6113,24 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		atomic_inc(&tmp_adev->gpu_reset_counter);
 	}
 
-	if (need_emergency_restart)
-		goto skip_sched_resume;
+	return r;
+}
 
-	/*
-	 * Must check guilty signal here since after this point all old
-	 * HW fences are force signaled.
-	 *
-	 * job->base holds a reference to parent fence
-	 */
-	if (job && dma_fence_is_signaled(&job->hw_fence)) {
-		job_signaled = true;
-		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
-		goto skip_hw_reset;
-	}
+static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
+			      struct list_head *device_list,
+			      struct amdgpu_reset_context *reset_context)
+{
+	struct amdgpu_device *tmp_adev = NULL;
+	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
+	int r = 0;
 
 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
-	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+	list_for_each_entry(tmp_adev, device_list, reset_list) {
+		if (adev->pcie_reset_ctx.occurs_dpc)
+			tmp_adev->no_hw_access = true;
 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
+		if (adev->pcie_reset_ctx.occurs_dpc)
+			tmp_adev->no_hw_access = false;
 		/*TODO Should we stop ?*/
 		if (r) {
 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
@@ -6112,6 +6142,11 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 	/* Actual ASIC resets if needed.*/
 	/* Host driver will handle XGMI hive reset for SRIOV */
 	if (amdgpu_sriov_vf(adev)) {
+
+		/* Bail out of reset early */
+		if (amdgpu_ras_is_rma(adev))
+			return -ENODEV;
+
 		if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
 			dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
 			amdgpu_ras_set_fed(adev, true);
@@ -6126,12 +6161,12 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 		if (r)
 			adev->asic_reset_res = r;
 	} else {
-		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
+		r = amdgpu_do_asic_reset(device_list, reset_context);
 		if (r && r == -EAGAIN)
 			goto retry;
 	}
 
-	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+	list_for_each_entry(tmp_adev, device_list, reset_list) {
 		/*
 		 * Drop any pending non scheduler resets queued before reset is done.
 		 * Any reset scheduled after this point would be valid. Scheduler resets
@@ -6141,10 +6176,18 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 		amdgpu_device_stop_pending_resets(tmp_adev);
 	}
 
-skip_hw_reset:
+	return r;
+}
+
+static int amdgpu_device_sched_resume(struct list_head *device_list,
+			      struct amdgpu_reset_context *reset_context,
+			      bool   job_signaled)
+{
+	struct amdgpu_device *tmp_adev = NULL;
+	int i, r = 0;
 
 	/* Post ASIC reset for all devs .*/
-	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+	list_for_each_entry(tmp_adev, device_list, reset_list) {
 
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = tmp_adev->rings[i];
@@ -6180,8 +6223,16 @@ skip_hw_reset:
 		}
 	}
 
-skip_sched_resume:
-	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+	return r;
+}
+
+static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
+			      struct list_head *device_list,
+			      bool   need_emergency_restart)
+{
+	struct amdgpu_device *tmp_adev = NULL;
+
+	list_for_each_entry(tmp_adev, device_list, reset_list) {
 		/* unlock kfd: SRIOV would do it separately */
 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
 			amdgpu_amdkfd_post_reset(tmp_adev);
@@ -6192,18 +6243,114 @@ skip_sched_resume:
 		if (!adev->kfd.init_complete)
 			amdgpu_amdkfd_device_init(adev);
 
-		if (audio_suspended)
+		if (tmp_adev->pcie_reset_ctx.audio_suspended)
 			amdgpu_device_resume_display_audio(tmp_adev);
 
 		amdgpu_device_unset_mp1_state(tmp_adev);
 
 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
+
 	}
 
-	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
+	tmp_adev = list_first_entry(device_list, struct amdgpu_device,
 					    reset_list);
 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
 
+}
+
+
+/**
+ * amdgpu_device_gpu_recover - reset the asic and recover scheduler
+ *
+ * @adev: amdgpu_device pointer
+ * @job: which job trigger hang
+ * @reset_context: amdgpu reset context pointer
+ *
+ * Attempt to reset the GPU if it has hung (all asics).
+ * Attempt to do soft-reset or full-reset and reinitialize Asic
+ * Returns 0 for success or an error on failure.
+ */
+
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+			      struct amdgpu_job *job,
+			      struct amdgpu_reset_context *reset_context)
+{
+	struct list_head device_list;
+	bool job_signaled = false;
+	struct amdgpu_hive_info *hive = NULL;
+	int r = 0;
+	bool need_emergency_restart = false;
+
+	/*
+	 * If it reaches here because of hang/timeout and a RAS error is
+	 * detected at the same time, let RAS recovery take care of it.
+	 */
+	if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
+	    !amdgpu_sriov_vf(adev) &&
+	    reset_context->src != AMDGPU_RESET_SRC_RAS) {
+		dev_dbg(adev->dev,
+			"Gpu recovery from source: %d yielding to RAS error recovery handling",
+			reset_context->src);
+		return 0;
+	}
+
+	/*
+	 * Special case: RAS triggered and full reset isn't supported
+	 */
+	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
+
+	/*
+	 * Flush RAM to disk so that after reboot
+	 * the user can read log and see why the system rebooted.
+	 */
+	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
+		amdgpu_ras_get_context(adev)->reboot) {
+		DRM_WARN("Emergency reboot.");
+
+		ksys_sync_helper();
+		emergency_restart();
+	}
+
+	dev_info(adev->dev, "GPU %s begin!\n",
+		need_emergency_restart ? "jobs stop":"reset");
+
+	if (!amdgpu_sriov_vf(adev))
+		hive = amdgpu_get_xgmi_hive(adev);
+	if (hive)
+		mutex_lock(&hive->hive_lock);
+
+	reset_context->job = job;
+	reset_context->hive = hive;
+	INIT_LIST_HEAD(&device_list);
+
+	r = amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
+					 hive, need_emergency_restart);
+	if (r)
+		goto end_reset;
+
+	if (need_emergency_restart)
+		goto skip_sched_resume;
+	/*
+	 * Must check guilty signal here since after this point all old
+	 * HW fences are force signaled.
+	 *
+	 * job->base holds a reference to parent fence
+	 */
+	if (job && dma_fence_is_signaled(&job->hw_fence)) {
+		job_signaled = true;
+		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
+		goto skip_hw_reset;
+	}
+
+	r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
+	if (r)
+		goto end_reset;
+skip_hw_reset:
+	r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
+	if (r)
+		goto end_reset;
+skip_sched_resume:
+	amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
 end_reset:
 	if (hive) {
 		mutex_unlock(&hive->hive_lock);
@@ -6587,12 +6734,15 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
 	struct amdgpu_device *adev = drm_to_adev(dev);
-	int i;
+	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+	struct amdgpu_reset_context reset_context;
+	struct list_head device_list;
+	int r = 0;
 
-	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
+	dev_info(adev->dev, "PCI error: detected callback!!\n");
 
-	if (adev->gmc.xgmi.num_physical_nodes > 1) {
-		DRM_WARN("No support for XGMI hive yet...");
+	if (!amdgpu_dpm_is_link_reset_supported(adev)) {
+		dev_warn(adev->dev, "No support for XGMI hive yet...\n");
 		return PCI_ERS_RESULT_DISCONNECT;
 	}
 
@@ -6600,32 +6750,30 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
 
 	switch (state) {
 	case pci_channel_io_normal:
+		dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
 		return PCI_ERS_RESULT_CAN_RECOVER;
-	/* Fatal error, prepare for slot reset */
 	case pci_channel_io_frozen:
-		/*
-		 * Locking adev->reset_domain->sem will prevent any external access
-		 * to GPU during PCI error recovery
-		 */
-		amdgpu_device_lock_reset_domain(adev->reset_domain);
-		amdgpu_device_set_mp1_state(adev);
-
-		/*
-		 * Block any work scheduling as we do for regular GPU reset
-		 * for the duration of the recovery
-		 */
-		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-			struct amdgpu_ring *ring = adev->rings[i];
-
-			if (!amdgpu_ring_sched_ready(ring))
-				continue;
-
-			drm_sched_stop(&ring->sched, NULL);
+		/* Fatal error, prepare for slot reset */
+		dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
+
+		if (hive)
+			mutex_lock(&hive->hive_lock);
+		adev->pcie_reset_ctx.occurs_dpc = true;
+		memset(&reset_context, 0, sizeof(reset_context));
+		INIT_LIST_HEAD(&device_list);
+
+		r = amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
+					 hive, false);
+		if (hive) {
+			mutex_unlock(&hive->hive_lock);
+			amdgpu_put_xgmi_hive(hive);
 		}
-		atomic_inc(&adev->gpu_reset_counter);
+		if (r)
+			return PCI_ERS_RESULT_DISCONNECT;
 		return PCI_ERS_RESULT_NEED_RESET;
 	case pci_channel_io_perm_failure:
 		/* Permanent error, prepare for device removal */
+		dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
 		return PCI_ERS_RESULT_DISCONNECT;
 	}
 
@@ -6638,8 +6786,10 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
  */
 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
 {
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = drm_to_adev(dev);
 
-	DRM_INFO("PCI error: mmio enabled callback!!\n");
+	dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
 
 	/* TODO - dump whatever for debugging purposes */
 
@@ -6663,10 +6813,12 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
 	struct amdgpu_device *adev = drm_to_adev(dev);
-	int r, i;
 	struct amdgpu_reset_context reset_context;
-	u32 memsize;
+	struct amdgpu_device *tmp_adev;
+	struct amdgpu_hive_info *hive;
 	struct list_head device_list;
+	int r = 0, i;
+	u32 memsize;
 
 	/* PCI error slot reset should be skipped During RAS recovery */
 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
@@ -6674,15 +6826,12 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
 	    amdgpu_ras_in_recovery(adev))
 		return PCI_ERS_RESULT_RECOVERED;
 
-	DRM_INFO("PCI error: slot reset callback!!\n");
+	dev_info(adev->dev, "PCI error: slot reset callback!!\n");
 
 	memset(&reset_context, 0, sizeof(reset_context));
 
-	INIT_LIST_HEAD(&device_list);
-	list_add_tail(&adev->reset_list, &device_list);
-
 	/* wait for asic to come out of reset */
-	msleep(500);
+	msleep(700);
 
 	/* Restore PCI confspace */
 	amdgpu_device_load_pci_state(pdev);
@@ -6703,26 +6852,40 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
 	reset_context.method = AMD_RESET_METHOD_NONE;
 	reset_context.reset_req_dev = adev;
 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
-
-	adev->no_hw_access = true;
-	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
-	adev->no_hw_access = false;
-	if (r)
-		goto out;
+	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+	INIT_LIST_HEAD(&device_list);
 
-	r = amdgpu_do_asic_reset(&device_list, &reset_context);
+	hive = amdgpu_get_xgmi_hive(adev);
+	if (hive) {
+		mutex_lock(&hive->hive_lock);
+		reset_context.hive = hive;
+		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+			tmp_adev->pcie_reset_ctx.in_link_reset = true;
+			list_add_tail(&tmp_adev->reset_list, &device_list);
+		}
+	} else {
+		set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
+		list_add_tail(&adev->reset_list, &device_list);
+	}
 
+	r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
 out:
 	if (!r) {
 		if (amdgpu_device_cache_pci_state(adev->pdev))
 			pci_restore_state(adev->pdev);
-
-		DRM_INFO("PCIe error recovery succeeded\n");
+		dev_info(adev->dev, "PCIe error recovery succeeded\n");
 	} else {
-		DRM_ERROR("PCIe error recovery failed, err:%d", r);
-		amdgpu_device_unset_mp1_state(adev);
-		amdgpu_device_unlock_reset_domain(adev->reset_domain);
+		dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
+		if (hive) {
+			list_for_each_entry(tmp_adev, &device_list, reset_list)
+				amdgpu_device_unset_mp1_state(tmp_adev);
+			amdgpu_device_unlock_reset_domain(adev->reset_domain);
+		}
+	}
+
+	if (hive) {
+		mutex_unlock(&hive->hive_lock);
+		amdgpu_put_xgmi_hive(hive);
 	}
 
 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
@@ -6739,26 +6902,36 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
 	struct amdgpu_device *adev = drm_to_adev(dev);
-	int i;
-
+	struct list_head device_list;
+	struct amdgpu_hive_info *hive = NULL;
+	struct amdgpu_device *tmp_adev = NULL;
 
-	DRM_INFO("PCI error: resume callback!!\n");
+	dev_info(adev->dev, "PCI error: resume callback!!\n");
 
 	/* Only continue execution for the case of pci_channel_io_frozen */
 	if (adev->pci_channel_state != pci_channel_io_frozen)
 		return;
 
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_ring *ring = adev->rings[i];
+	INIT_LIST_HEAD(&device_list);
 
-		if (!amdgpu_ring_sched_ready(ring))
-			continue;
+	hive = amdgpu_get_xgmi_hive(adev);
+	if (hive) {
+		mutex_lock(&hive->hive_lock);
+		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+			tmp_adev->pcie_reset_ctx.in_link_reset = false;
+			list_add_tail(&tmp_adev->reset_list, &device_list);
+		}
+	} else
+		list_add_tail(&adev->reset_list, &device_list);
 
-		drm_sched_start(&ring->sched, 0);
-	}
+	amdgpu_device_sched_resume(&device_list, NULL, NULL);
+	amdgpu_device_gpu_resume(adev, &device_list, false);
+	adev->pcie_reset_ctx.occurs_dpc = false;
 
-	amdgpu_device_unset_mp1_state(adev);
-	amdgpu_device_unlock_reset_domain(adev->reset_domain);
+	if (hive) {
+		mutex_unlock(&hive->hive_lock);
+		amdgpu_put_xgmi_hive(hive);
+	}
 }
 
 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 24ee4710f807..4ddd08ce8885 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -51,6 +51,8 @@
 #include "amdgpu_reset.h"
 #include "amdgpu_sched.h"
 #include "amdgpu_xgmi.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_userq_fence.h"
 #include "../amdxcp/amdgpu_xcp_drv.h"
 
 /*
@@ -123,9 +125,10 @@
  * - 3.61.0 - Contains fix for RV/PCO compute queues
  * - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT
  * - 3.63.0 - GFX12 display DCC supports 256B max compressed block size
+ * - 3.64.0 - Userq IP support query
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	63
+#define KMS_DRIVER_MINOR	64
 #define KMS_DRIVER_PATCHLEVEL	0
 
 /*
@@ -140,6 +143,7 @@ enum AMDGPU_DEBUG_MASK {
 	AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5),
 	AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
 	AMDGPU_DEBUG_SMU_POOL = BIT(7),
+	AMDGPU_DEBUG_VM_USERPTR = BIT(8),
 };
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -176,7 +180,7 @@ uint amdgpu_pg_mask = 0xffffffff;
 uint amdgpu_sdma_phase_quantum = 32;
 char *amdgpu_disable_cu;
 char *amdgpu_virtual_display;
-bool enforce_isolation;
+int amdgpu_enforce_isolation = -1;
 int amdgpu_modeset = -1;
 
 /* Specifies the default granularity for SVM, used in buffer
@@ -238,6 +242,8 @@ int amdgpu_agp = -1; /* auto */
 int amdgpu_wbrf = -1;
 int amdgpu_damage_clips = -1; /* auto */
 int amdgpu_umsch_mm_fwlog;
+int amdgpu_rebar = -1; /* auto */
+int amdgpu_user_queue = -1;
 
 DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
 			"DRM_UT_CORE",
@@ -1033,11 +1039,13 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
 
 
 /**
- * DOC: enforce_isolation (bool)
- * enforce process isolation between graphics and compute via using the same reserved vmid.
+ * DOC: enforce_isolation (int)
+ * enforce process isolation between graphics and compute.
+ * (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)
  */
-module_param(enforce_isolation, bool, 0444);
-MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
+module_param_named(enforce_isolation, amdgpu_enforce_isolation, int, 0444);
+MODULE_PARM_DESC(enforce_isolation,
+"enforce process isolation between graphics and compute. (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)");
 
 /**
  * DOC: modeset (int)
@@ -1096,6 +1104,28 @@ MODULE_PARM_DESC(wbrf,
 	"Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)");
 module_param_named(wbrf, amdgpu_wbrf, int, 0444);
 
+/**
+ * DOC: rebar (int)
+ * Allow BAR resizing.  Disable this to prevent the driver from attempting
+ * to resize the BAR if the GPU supports it and there is available MMIO space.
+ * Note that this just prevents the driver from resizing the BAR.  The BIOS
+ * may have already resized the BAR at boot time.
+ */
+MODULE_PARM_DESC(rebar, "Resizable BAR (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(rebar, amdgpu_rebar, int, 0444);
+
+/**
+ * DOC: user_queue (int)
+ * Enable user queues on systems that support user queues. Possible values:
+ *
+ * - -1 = auto (ASIC specific default)
+ * -  0 = user queues disabled
+ * -  1 = user queues enabled and kernel queues enabled (if supported)
+ * -  2 = user queues enabled and kernel queues disabled
+ */
+MODULE_PARM_DESC(user_queue, "Enable user queues (-1 = auto (default), 0 = disable, 1 = enable, 2 = enable UQs and disable KQs)");
+module_param_named(user_queue, amdgpu_user_queue, int, 0444);
+
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
@@ -2244,6 +2274,10 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
 		pr_info("debug: use vram for smu pool\n");
 		adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM;
 	}
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_VM_USERPTR) {
+		pr_info("debug: VM mode debug for userptr is enabled\n");
+		adev->debug_vm_userptr = true;
+	}
 }
 
 static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
@@ -2615,13 +2649,8 @@ static int amdgpu_pmops_freeze(struct device *dev)
 static int amdgpu_pmops_thaw(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
-	struct amdgpu_device *adev = drm_to_adev(drm_dev);
-	int r;
 
-	r = amdgpu_device_resume(drm_dev, true);
-	adev->in_s4 = false;
-
-	return r;
+	return amdgpu_device_resume(drm_dev, true);
 }
 
 static int amdgpu_pmops_poweroff(struct device *dev)
@@ -2634,9 +2663,6 @@ static int amdgpu_pmops_poweroff(struct device *dev)
 static int amdgpu_pmops_restore(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
-	struct amdgpu_device *adev = drm_to_adev(drm_dev);
-
-	adev->in_s4 = false;
 
 	return amdgpu_device_resume(drm_dev, true);
 }
@@ -2708,6 +2734,29 @@ static int amdgpu_runtime_idle_check_display(struct device *dev)
 	return 0;
 }
 
+static int amdgpu_runtime_idle_check_userq(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = drm_to_adev(drm_dev);
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_userq_mgr *uqm, *tmp;
+	int queue_id;
+	int ret = 0;
+
+	mutex_lock(&adev->userq_mutex);
+	list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+		idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+			ret = -EBUSY;
+			goto done;
+		}
+	}
+done:
+	mutex_unlock(&adev->userq_mutex);
+
+	return ret;
+}
+
 static int amdgpu_pmops_runtime_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
@@ -2723,6 +2772,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
 	ret = amdgpu_runtime_idle_check_display(dev);
 	if (ret)
 		return ret;
+	ret = amdgpu_runtime_idle_check_userq(dev);
+	if (ret)
+		return ret;
 
 	/* wait for all rings to drain before suspending */
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
@@ -2844,12 +2896,30 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
 	}
 
 	ret = amdgpu_runtime_idle_check_display(dev);
+	if (ret)
+		goto done;
 
+	ret = amdgpu_runtime_idle_check_userq(dev);
+done:
 	pm_runtime_mark_last_busy(dev);
 	pm_runtime_autosuspend(dev);
 	return ret;
 }
 
+static int amdgpu_drm_release(struct inode *inode, struct file *filp)
+{
+	struct drm_file *file_priv = filp->private_data;
+	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+
+	if (fpriv) {
+		fpriv->evf_mgr.fd_closing = true;
+		amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
+		amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
+	}
+
+	return drm_release(inode, filp);
+}
+
 long amdgpu_drm_ioctl(struct file *filp,
 		      unsigned int cmd, unsigned long arg)
 {
@@ -2901,7 +2971,7 @@ static const struct file_operations amdgpu_driver_kms_fops = {
 	.owner = THIS_MODULE,
 	.open = drm_open,
 	.flush = amdgpu_flush,
-	.release = drm_release,
+	.release = amdgpu_drm_release,
 	.unlocked_ioctl = amdgpu_drm_ioctl,
 	.mmap = drm_gem_mmap,
 	.poll = drm_poll,
@@ -2948,6 +3018,9 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 
 static const struct drm_driver amdgpu_kms_driver = {
@@ -3038,6 +3111,10 @@ static int __init amdgpu_init(void)
 	if (r)
 		goto error_fence;
 
+	r = amdgpu_userq_fence_slab_init();
+	if (r)
+		goto error_fence;
+
 	DRM_INFO("amdgpu kernel modesetting enabled.\n");
 	amdgpu_register_atpx_handler();
 	amdgpu_acpi_detect();
@@ -3069,6 +3146,7 @@ static void __exit amdgpu_exit(void)
 	amdgpu_acpi_release();
 	amdgpu_sync_fini();
 	amdgpu_fence_slab_fini();
+	amdgpu_userq_fence_slab_fini();
 	mmu_notifier_synchronize();
 	amdgpu_xcp_drv_release();
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
new file mode 100644
index 000000000000..1a7469543db5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/sched.h>
+#include <drm/drm_exec.h>
+#include "amdgpu.h"
+
+#define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name)
+#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr)
+
+static const char *
+amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "amdgpu_eviction_fence";
+}
+
+static const char *
+amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f)
+{
+	struct amdgpu_eviction_fence *ef;
+
+	ef = container_of(f, struct amdgpu_eviction_fence, base);
+	return ef->timeline_name;
+}
+
+int
+amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+				    struct drm_exec *exec)
+{
+	struct amdgpu_eviction_fence *old_ef, *new_ef;
+	struct drm_gem_object *obj;
+	unsigned long index;
+	int ret;
+
+	if (evf_mgr->ev_fence &&
+	    !dma_fence_is_signaled(&evf_mgr->ev_fence->base))
+		return 0;
+	/*
+	 * Steps to replace eviction fence:
+	 * * lock all objects in exec (caller)
+	 * * create a new eviction fence
+	 * * update new eviction fence in evf_mgr
+	 * * attach the new eviction fence to BOs
+	 * * release the old fence
+	 * * unlock the objects (caller)
+	 */
+	new_ef = amdgpu_eviction_fence_create(evf_mgr);
+	if (!new_ef) {
+		DRM_ERROR("Failed to create new eviction fence\n");
+		return -ENOMEM;
+	}
+
+	/* Update the eviction fence now */
+	spin_lock(&evf_mgr->ev_fence_lock);
+	old_ef = evf_mgr->ev_fence;
+	evf_mgr->ev_fence = new_ef;
+	spin_unlock(&evf_mgr->ev_fence_lock);
+
+	/* Attach the new fence */
+	drm_exec_for_each_locked_object(exec, index, obj) {
+		struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+		if (!bo)
+			continue;
+		ret = amdgpu_eviction_fence_attach(evf_mgr, bo);
+		if (ret) {
+			DRM_ERROR("Failed to attch new eviction fence\n");
+			goto free_err;
+		}
+	}
+
+	/* Free old fence */
+	if (old_ef)
+		dma_fence_put(&old_ef->base);
+	return 0;
+
+free_err:
+	kfree(new_ef);
+	return ret;
+}
+
+static void
+amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
+{
+	struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work);
+	struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr);
+	struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+	struct amdgpu_eviction_fence *ev_fence;
+
+	mutex_lock(&uq_mgr->userq_mutex);
+	ev_fence = evf_mgr->ev_fence;
+	if (!ev_fence)
+		goto unlock;
+
+	amdgpu_userq_evict(uq_mgr, ev_fence);
+
+unlock:
+	mutex_unlock(&uq_mgr->userq_mutex);
+}
+
+static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f)
+{
+	struct amdgpu_eviction_fence_mgr *evf_mgr;
+	struct amdgpu_eviction_fence *ev_fence;
+
+	if (!f)
+		return true;
+
+	ev_fence = to_ev_fence(f);
+	evf_mgr = ev_fence->evf_mgr;
+
+	schedule_delayed_work(&evf_mgr->suspend_work, 0);
+	return true;
+}
+
+static const struct dma_fence_ops amdgpu_eviction_fence_ops = {
+	.use_64bit_seqno = true,
+	.get_driver_name = amdgpu_eviction_fence_get_driver_name,
+	.get_timeline_name = amdgpu_eviction_fence_get_timeline_name,
+	.enable_signaling = amdgpu_eviction_fence_enable_signaling,
+};
+
+void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
+				  struct amdgpu_eviction_fence *ev_fence)
+{
+	spin_lock(&evf_mgr->ev_fence_lock);
+	dma_fence_signal(&ev_fence->base);
+	spin_unlock(&evf_mgr->ev_fence_lock);
+}
+
+struct amdgpu_eviction_fence *
+amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+	struct amdgpu_eviction_fence *ev_fence;
+
+	ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL);
+	if (!ev_fence)
+		return NULL;
+
+	ev_fence->evf_mgr = evf_mgr;
+	get_task_comm(ev_fence->timeline_name, current);
+	spin_lock_init(&ev_fence->lock);
+	dma_fence_init(&ev_fence->base, &amdgpu_eviction_fence_ops,
+		       &ev_fence->lock, evf_mgr->ev_fence_ctx,
+		       atomic_inc_return(&evf_mgr->ev_fence_seq));
+	return ev_fence;
+}
+
+void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+	struct amdgpu_eviction_fence *ev_fence;
+
+	/* Wait for any pending work to execute */
+	flush_delayed_work(&evf_mgr->suspend_work);
+
+	spin_lock(&evf_mgr->ev_fence_lock);
+	ev_fence = evf_mgr->ev_fence;
+	spin_unlock(&evf_mgr->ev_fence_lock);
+
+	if (!ev_fence)
+		return;
+
+	dma_fence_wait(&ev_fence->base, false);
+
+	/* Last unref of ev_fence */
+	dma_fence_put(&evf_mgr->ev_fence->base);
+}
+
+int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+				 struct amdgpu_bo *bo)
+{
+	struct amdgpu_eviction_fence *ev_fence;
+	struct dma_resv *resv = bo->tbo.base.resv;
+	int ret;
+
+	if (!resv)
+		return 0;
+
+	ret = dma_resv_reserve_fences(resv, 1);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Failed to resv fence space\n");
+		return ret;
+	}
+
+	spin_lock(&evf_mgr->ev_fence_lock);
+	ev_fence = evf_mgr->ev_fence;
+	if (ev_fence)
+		dma_resv_add_fence(resv, &ev_fence->base, DMA_RESV_USAGE_BOOKKEEP);
+	spin_unlock(&evf_mgr->ev_fence_lock);
+
+	return 0;
+}
+
+void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+				  struct amdgpu_bo *bo)
+{
+	struct dma_fence *stub = dma_fence_get_stub();
+
+	dma_resv_replace_fences(bo->tbo.base.resv, evf_mgr->ev_fence_ctx,
+				stub, DMA_RESV_USAGE_BOOKKEEP);
+	dma_fence_put(stub);
+}
+
+int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+	/* This needs to be done one time per open */
+	atomic_set(&evf_mgr->ev_fence_seq, 0);
+	evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1);
+	spin_lock_init(&evf_mgr->ev_fence_lock);
+
+	INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker);
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
new file mode 100644
index 000000000000..fcd867b7147d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_EV_FENCE_H_
+#define AMDGPU_EV_FENCE_H_
+
+struct amdgpu_eviction_fence {
+	struct dma_fence base;
+	spinlock_t	 lock;
+	char		 timeline_name[TASK_COMM_LEN];
+	struct amdgpu_eviction_fence_mgr *evf_mgr;
+};
+
+struct amdgpu_eviction_fence_mgr {
+	u64			ev_fence_ctx;
+	atomic_t		ev_fence_seq;
+	spinlock_t		ev_fence_lock;
+	struct amdgpu_eviction_fence *ev_fence;
+	struct delayed_work	suspend_work;
+	uint8_t fd_closing;
+};
+
+/* Eviction fence helper functions */
+struct amdgpu_eviction_fence *
+amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+void
+amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+int
+amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+			     struct amdgpu_bo *bo);
+
+void
+amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+			     struct amdgpu_bo *bo);
+
+int
+amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+void
+amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
+			     struct amdgpu_eviction_fence *ev_fence);
+
+int
+amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+				    struct drm_exec *exec);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 69429df09477..2c68118fe9fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -36,6 +36,7 @@
 #include <drm/drm_exec.h>
 #include <drm/drm_gem_ttm_helper.h>
 #include <drm/ttm/ttm_tt.h>
+#include <drm/drm_syncobj.h>
 
 #include "amdgpu.h"
 #include "amdgpu_display.h"
@@ -44,6 +45,114 @@
 #include "amdgpu_xgmi.h"
 #include "amdgpu_vm.h"
 
+static int
+amdgpu_gem_add_input_fence(struct drm_file *filp,
+			   uint64_t syncobj_handles_array,
+			   uint32_t num_syncobj_handles)
+{
+	struct dma_fence *fence;
+	uint32_t *syncobj_handles;
+	int ret, i;
+
+	if (!num_syncobj_handles)
+		return 0;
+
+	syncobj_handles = memdup_user(u64_to_user_ptr(syncobj_handles_array),
+				      sizeof(uint32_t) * num_syncobj_handles);
+	if (IS_ERR(syncobj_handles))
+		return PTR_ERR(syncobj_handles);
+
+	for (i = 0; i < num_syncobj_handles; i++) {
+
+		if (!syncobj_handles[i]) {
+			ret = -EINVAL;
+			goto free_memdup;
+		}
+
+		ret = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 0, &fence);
+		if (ret)
+			goto free_memdup;
+
+		dma_fence_wait(fence, false);
+
+		/* TODO: optimize async handling */
+		dma_fence_put(fence);
+	}
+
+free_memdup:
+	kfree(syncobj_handles);
+	return ret;
+}
+
+static int
+amdgpu_gem_update_timeline_node(struct drm_file *filp,
+				uint32_t syncobj_handle,
+				uint64_t point,
+				struct drm_syncobj **syncobj,
+				struct dma_fence_chain **chain)
+{
+	if (!syncobj_handle)
+		return 0;
+
+	/* Find the sync object */
+	*syncobj = drm_syncobj_find(filp, syncobj_handle);
+	if (!*syncobj)
+		return -ENOENT;
+
+	if (!point)
+		return 0;
+
+	/* Allocate the chain node */
+	*chain = dma_fence_chain_alloc();
+	if (!*chain) {
+		drm_syncobj_put(*syncobj);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void
+amdgpu_gem_update_bo_mapping(struct drm_file *filp,
+			     struct amdgpu_bo_va *bo_va,
+			     uint32_t operation,
+			     uint64_t point,
+			     struct dma_fence *fence,
+			     struct drm_syncobj *syncobj,
+			     struct dma_fence_chain *chain)
+{
+	struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
+	struct dma_fence *last_update;
+
+	if (!syncobj)
+		return;
+
+	/* Find the last update fence */
+	switch (operation) {
+	case AMDGPU_VA_OP_MAP:
+	case AMDGPU_VA_OP_REPLACE:
+		if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv))
+			last_update = vm->last_update;
+		else
+			last_update = bo_va->last_pt_update;
+		break;
+	case AMDGPU_VA_OP_UNMAP:
+	case AMDGPU_VA_OP_CLEAR:
+		last_update = fence;
+		break;
+	default:
+		return;
+	}
+
+	/* Add fence to timeline */
+	if (!point)
+		drm_syncobj_replace_fence(syncobj, last_update);
+	else
+		drm_syncobj_add_point(syncobj, chain, last_update, point);
+}
+
 static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
 {
 	struct ttm_buffer_object *bo = vmf->vma->vm_private_data;
@@ -184,6 +293,15 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
 		bo_va = amdgpu_vm_bo_add(adev, vm, abo);
 	else
 		++bo_va->ref_count;
+
+	/* attach gfx eviction fence */
+	r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo);
+	if (r) {
+		DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n");
+		amdgpu_bo_unreserve(abo);
+		return r;
+	}
+
 	amdgpu_bo_unreserve(abo);
 
 	/* Validate and add eviction fence to DMABuf imports with dynamic
@@ -247,6 +365,9 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
 			goto out_unlock;
 	}
 
+	if (!amdgpu_vm_is_bo_always_valid(vm, bo))
+		amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo);
+
 	bo_va = amdgpu_vm_bo_find(vm, bo);
 	if (!bo_va || --bo_va->ref_count)
 		goto out_unlock;
@@ -321,10 +442,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 	uint32_t handle, initial_domain;
 	int r;
 
-	/* reject DOORBELLs until userspace code to use it is available */
-	if (args->in.domains & AMDGPU_GEM_DOMAIN_DOORBELL)
-		return -EINVAL;
-
 	/* reject invalid gem flags */
 	if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
@@ -638,18 +755,23 @@ out:
  *
  * Update the bo_va directly after setting its address. Errors are not
  * vital here, so they are not reported back to userspace.
+ *
+ * Returns resulting fence if freed BO(s) got cleared from the PT.
+ * otherwise stub fence in case of error.
  */
-static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
-				    struct amdgpu_vm *vm,
-				    struct amdgpu_bo_va *bo_va,
-				    uint32_t operation)
+static struct dma_fence *
+amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
+			struct amdgpu_vm *vm,
+			struct amdgpu_bo_va *bo_va,
+			uint32_t operation)
 {
+	struct dma_fence *fence = dma_fence_get_stub();
 	int r;
 
 	if (!amdgpu_vm_ready(vm))
-		return;
+		return fence;
 
-	r = amdgpu_vm_clear_freed(adev, vm, NULL);
+	r = amdgpu_vm_clear_freed(adev, vm, &fence);
 	if (r)
 		goto error;
 
@@ -665,6 +787,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 error:
 	if (r && r != -ERESTARTSYS)
 		DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
+
+	return fence;
 }
 
 /**
@@ -713,6 +837,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	struct amdgpu_bo *abo;
 	struct amdgpu_bo_va *bo_va;
+	struct drm_syncobj *timeline_syncobj = NULL;
+	struct dma_fence_chain *timeline_chain = NULL;
+	struct dma_fence *fence;
 	struct drm_exec exec;
 	uint64_t va_flags;
 	uint64_t vm_size;
@@ -774,6 +901,12 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 		abo = NULL;
 	}
 
+	r = amdgpu_gem_add_input_fence(filp,
+				       args->input_fence_syncobj_handles,
+				       args->num_syncobj_handles);
+	if (r)
+		goto error_put_gobj;
+
 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
 		      DRM_EXEC_IGNORE_DUPLICATES, 0);
 	drm_exec_until_all_locked(&exec) {
@@ -802,6 +935,14 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 		bo_va = NULL;
 	}
 
+	r = amdgpu_gem_update_timeline_node(filp,
+					    args->vm_timeline_syncobj_out,
+					    args->vm_timeline_point,
+					    &timeline_syncobj,
+					    &timeline_chain);
+	if (r)
+		goto error;
+
 	switch (args->operation) {
 	case AMDGPU_VA_OP_MAP:
 		va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
@@ -827,12 +968,24 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	default:
 		break;
 	}
-	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm)
-		amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
-					args->operation);
+	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) {
+		fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
+						args->operation);
+
+		if (timeline_syncobj)
+			amdgpu_gem_update_bo_mapping(filp, bo_va,
+					     args->operation,
+					     args->vm_timeline_point,
+					     fence, timeline_syncobj,
+					     timeline_chain);
+		else
+			dma_fence_put(fence);
+
+	}
 
 error:
 	drm_exec_fini(&exec);
+error_put_gobj:
 	drm_gem_object_put(gobj);
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index cf2df7790077..1db1e6ec0184 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -33,6 +33,7 @@
 #include "amdgpu_reset.h"
 #include "amdgpu_xcp.h"
 #include "amdgpu_xgmi.h"
+#include "nvd.h"
 
 /* delay 0.1 second to enable gfx off feature */
 #define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
@@ -74,14 +75,15 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
 			adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
 }
 
-int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
-			       int me, int pipe, int queue)
+static int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
+				      int me, int pipe, int queue)
 {
+	int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
 	int bit = 0;
 
 	bit += me * adev->gfx.me.num_pipe_per_me
-		* adev->gfx.me.num_queue_per_pipe;
-	bit += pipe * adev->gfx.me.num_queue_per_pipe;
+		* num_queue_per_pipe;
+	bit += pipe * num_queue_per_pipe;
 	bit += queue;
 
 	return bit;
@@ -238,8 +240,8 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
 {
 	int i, queue, pipe;
 	bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
-	int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
-					adev->gfx.me.num_queue_per_pipe;
+	int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+	int max_queues_per_me = adev->gfx.me.num_pipe_per_me * num_queue_per_pipe;
 
 	if (multipipe_policy) {
 		/* policy: amdgpu owns the first queue per pipe at this stage
@@ -247,9 +249,9 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
 		for (i = 0; i < max_queues_per_me; i++) {
 			pipe = i % adev->gfx.me.num_pipe_per_me;
 			queue = (i / adev->gfx.me.num_pipe_per_me) %
-				adev->gfx.me.num_queue_per_pipe;
+				num_queue_per_pipe;
 
-			set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
+			set_bit(pipe * num_queue_per_pipe + queue,
 				adev->gfx.me.queue_bitmap);
 		}
 	} else {
@@ -258,8 +260,9 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
 	}
 
 	/* update the number of active graphics rings */
-	adev->gfx.num_gfx_rings =
-		bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+	if (adev->gfx.num_gfx_rings)
+		adev->gfx.num_gfx_rings =
+			bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
 }
 
 static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
@@ -1351,6 +1354,10 @@ static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
 	struct amdgpu_device *adev = drm_to_adev(ddev);
 	int mode;
 
+	/* Only minimal precaution taken to reject requests while in reset.*/
+	if (amdgpu_in_reset(adev))
+		return -EPERM;
+
 	mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
 					       AMDGPU_XCP_FL_NONE);
 
@@ -1394,8 +1401,14 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
 		return -EINVAL;
 	}
 
+	/* Don't allow a switch while under reset */
+	if (!down_read_trylock(&adev->reset_domain->sem))
+		return -EPERM;
+
 	ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
 
+	up_read(&adev->reset_domain->sem);
+
 	if (ret)
 		return ret;
 
@@ -1466,6 +1479,8 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
 		goto err;
 
 	job->enforce_isolation = true;
+	/* always run the cleaner shader */
+	job->run_cleaner_shader = true;
 
 	ib = &job->ibs[0];
 	for (i = 0; i <= ring->funcs->align_mask; ++i)
@@ -1552,6 +1567,9 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
 	if (adev->in_suspend && !adev->in_runpm)
 		return -EPERM;
 
+	if (adev->gfx.disable_kq)
+		return -EPERM;
+
 	ret = kstrtol(buf, 0, &value);
 
 	if (ret)
@@ -1594,7 +1612,8 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
  * Provides the sysfs read interface to get the current settings of the 'enforce_isolation'
  * feature for each GPU partition. Reading from the 'enforce_isolation'
  * sysfs file returns the isolation settings for all partitions, where '0'
- * indicates disabled and '1' indicates enabled.
+ * indicates disabled, '1' indicates enabled, and '2' indicates enabled in legacy mode,
+ * and '3' indicates enabled without cleaner shader.
  *
  * Return: The number of bytes read from the sysfs file.
  */
@@ -1629,9 +1648,12 @@ static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
  * @count: The size of the input data
  *
  * This function allows control over the 'enforce_isolation' feature, which
- * serializes access to the graphics engine. Writing '1' or '0' to the
- * 'enforce_isolation' sysfs file enables or disables process isolation for
- * each partition. The input should specify the setting for all partitions.
+ * serializes access to the graphics engine. Writing '0' to disable, '1' to
+ * enable isolation with cleaner shader, '2' to enable legacy isolation without
+ * cleaner shader, or '3' to enable process isolation without submitting the
+ * cleaner shader to the 'enforce_isolation' sysfs file sets the isolation mode
+ * for each partition. The input should specify the setting for all
+ * partitions.
  *
  * Return: The number of bytes written to the sysfs file.
  */
@@ -1668,13 +1690,34 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
 		return -EINVAL;
 
 	for (i = 0; i < num_partitions; i++) {
-		if (partition_values[i] != 0 && partition_values[i] != 1)
+		if (partition_values[i] != 0 &&
+		    partition_values[i] != 1 &&
+		    partition_values[i] != 2 &&
+		    partition_values[i] != 3)
 			return -EINVAL;
 	}
 
 	mutex_lock(&adev->enforce_isolation_mutex);
-	for (i = 0; i < num_partitions; i++)
-		adev->enforce_isolation[i] = partition_values[i];
+	for (i = 0; i < num_partitions; i++) {
+		switch (partition_values[i]) {
+		case 0:
+		default:
+			adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
+			break;
+		case 1:
+			adev->enforce_isolation[i] =
+				AMDGPU_ENFORCE_ISOLATION_ENABLE;
+			break;
+		case 2:
+			adev->enforce_isolation[i] =
+				AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
+			break;
+		case 3:
+			adev->enforce_isolation[i] =
+				AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
+			break;
+		}
+	}
 	mutex_unlock(&adev->enforce_isolation_mutex);
 
 	amdgpu_mes_update_enforce_isolation(adev);
@@ -1923,39 +1966,41 @@ void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
 static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
 				    bool enable)
 {
-	mutex_lock(&adev->gfx.kfd_sch_mutex);
+	mutex_lock(&adev->gfx.userq_sch_mutex);
 
 	if (enable) {
 		/* If the count is already 0, it means there's an imbalance bug somewhere.
 		 * Note that the bug may be in a different caller than the one which triggers the
 		 * WARN_ON_ONCE.
 		 */
-		if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) {
+		if (WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx] == 0)) {
 			dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
 			goto unlock;
 		}
 
-		adev->gfx.kfd_sch_req_count[idx]--;
+		adev->gfx.userq_sch_req_count[idx]--;
 
-		if (adev->gfx.kfd_sch_req_count[idx] == 0 &&
-		    adev->gfx.kfd_sch_inactive[idx]) {
+		if (adev->gfx.userq_sch_req_count[idx] == 0 &&
+		    adev->gfx.userq_sch_inactive[idx]) {
 			schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
 					      msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx]));
 		}
 	} else {
-		if (adev->gfx.kfd_sch_req_count[idx] == 0) {
+		if (adev->gfx.userq_sch_req_count[idx] == 0) {
 			cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
-			if (!adev->gfx.kfd_sch_inactive[idx]) {
-				amdgpu_amdkfd_stop_sched(adev, idx);
-				adev->gfx.kfd_sch_inactive[idx] = true;
+			if (!adev->gfx.userq_sch_inactive[idx]) {
+				amdgpu_userq_stop_sched_for_enforce_isolation(adev, idx);
+				if (adev->kfd.init_complete)
+					amdgpu_amdkfd_stop_sched(adev, idx);
+				adev->gfx.userq_sch_inactive[idx] = true;
 			}
 		}
 
-		adev->gfx.kfd_sch_req_count[idx]++;
+		adev->gfx.userq_sch_req_count[idx]++;
 	}
 
 unlock:
-	mutex_unlock(&adev->gfx.kfd_sch_mutex);
+	mutex_unlock(&adev->gfx.userq_sch_mutex);
 }
 
 /**
@@ -2000,12 +2045,13 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
 				      msecs_to_jiffies(1));
 	} else {
 		/* Tell KFD to resume the runqueue */
-		if (adev->kfd.init_complete) {
-			WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]);
-			WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]);
+		WARN_ON_ONCE(!adev->gfx.userq_sch_inactive[idx]);
+		WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx]);
+
+		amdgpu_userq_start_sched_for_enforce_isolation(adev, idx);
+		if (adev->kfd.init_complete)
 			amdgpu_amdkfd_start_sched(adev, idx);
-			adev->gfx.kfd_sch_inactive[idx] = false;
-		}
+		adev->gfx.userq_sch_inactive[idx] = false;
 	}
 	mutex_unlock(&adev->enforce_isolation_mutex);
 }
@@ -2029,7 +2075,7 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
 	bool wait = false;
 
 	mutex_lock(&adev->enforce_isolation_mutex);
-	if (adev->enforce_isolation[idx]) {
+	if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
 		/* set the initial values if nothing is set */
 		if (!adev->gfx.enforce_isolation_jiffies[idx]) {
 			adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
@@ -2096,7 +2142,7 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
 	amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx);
 
 	mutex_lock(&adev->enforce_isolation_mutex);
-	if (adev->enforce_isolation[idx]) {
+	if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
 		if (adev->kfd.init_complete)
 			sched_work = true;
 	}
@@ -2133,7 +2179,7 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
 		return;
 
 	mutex_lock(&adev->enforce_isolation_mutex);
-	if (adev->enforce_isolation[idx]) {
+	if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
 		if (adev->kfd.init_complete)
 			sched_work = true;
 	}
@@ -2217,6 +2263,74 @@ void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)
 	schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
 }
 
+/**
+ * amdgpu_gfx_csb_preamble_start - Set CSB preamble start
+ *
+ * @buffer: This is an output variable that gets the PACKET3 preamble setup.
+ *
+ * Return:
+ * return the latest index.
+ */
+u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer)
+{
+	u32 count = 0;
+
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+	buffer[count++] = cpu_to_le32(0x80000000);
+	buffer[count++] = cpu_to_le32(0x80000000);
+
+	return count;
+}
+
+/**
+ * amdgpu_gfx_csb_data_parser - Parser CS data
+ *
+ * @adev: amdgpu_device pointer used to get the CS data and other gfx info.
+ * @buffer: This is an output variable that gets the PACKET3 preamble end.
+ * @count: Index to start set the preemble end.
+ *
+ * Return:
+ * return the latest index.
+ */
+u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count)
+{
+	const struct cs_section_def *sect = NULL;
+	const struct cs_extent_def *ext = NULL;
+	u32 i;
+
+	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+		for (ext = sect->section; ext->extent != NULL; ++ext) {
+			if (sect->id == SECT_CONTEXT) {
+				buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+				buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
+
+				for (i = 0; i < ext->reg_count; i++)
+					buffer[count++] = cpu_to_le32(ext->extent[i]);
+			}
+		}
+	}
+
+	return count;
+}
+
+/**
+ * amdgpu_gfx_csb_preamble_end - Set CSB preamble end
+ *
+ * @buffer: This is an output variable that gets the PACKET3 preamble end.
+ * @count: Index to start set the preemble end.
+ */
+void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count)
+{
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
+	buffer[count++] = cpu_to_le32(0);
+}
+
 /*
  * debugfs for to enable/disable gfx job submission to specific core.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 87e862188766..08f268dab8f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -170,10 +170,46 @@ struct amdgpu_kiq {
 #define AMDGPU_GFX_MAX_SE 4
 #define AMDGPU_GFX_MAX_SH_PER_SE 2
 
+/**
+ * amdgpu_rb_config - Configure a single Render Backend (RB)
+ *
+ * Bad RBs are fused off and there is a harvest register the driver reads to
+ * determine which RB(s) are fused off so that the driver can configure the
+ * hardware state so that nothing gets sent to them. There are also user
+ * harvest registers that the driver can program to disable additional RBs,
+ * etc., for testing purposes.
+ */
 struct amdgpu_rb_config {
+	/**
+	 * @rb_backend_disable:
+	 *
+	 * The value captured from register RB_BACKEND_DISABLE indicates if the
+	 * RB backend is disabled or not.
+	 */
 	uint32_t rb_backend_disable;
+
+	/**
+	 * @user_rb_backend_disable:
+	 *
+	 * The value captured from register USER_RB_BACKEND_DISABLE indicates
+	 * if the User RB backend is disabled or not.
+	 */
 	uint32_t user_rb_backend_disable;
+
+	/**
+	 * @raster_config:
+	 *
+	 * To set up all of the states, it is necessary to have two registers
+	 * to keep all of the states. This field holds the first register.
+	 */
 	uint32_t raster_config;
+
+	/**
+	 * @raster_config_1:
+	 *
+	 * To set up all of the states, it is necessary to have two registers
+	 * to keep all of the states. This field holds the second register.
+	 */
 	uint32_t raster_config_1;
 };
 
@@ -221,6 +257,13 @@ struct amdgpu_gfx_config {
 	uint32_t macrotile_mode_array[16];
 
 	struct gb_addr_config gb_addr_config_fields;
+
+	/**
+	 * @rb_config:
+	 *
+	 * Matrix that keeps all the Render Backend (color and depth buffer
+	 * handling) configuration on the 3D engine.
+	 */
 	struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
 
 	/* gfx configure feature */
@@ -305,7 +348,8 @@ struct amdgpu_gfx_funcs {
 	void (*init_spm_golden)(struct amdgpu_device *adev);
 	void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
 	int (*get_gfx_shadow_info)(struct amdgpu_device *adev,
-				   struct amdgpu_gfx_shadow_info *shadow_info);
+				   struct amdgpu_gfx_shadow_info *shadow_info,
+				   bool skip_check);
 	enum amdgpu_gfx_partition
 			(*query_partition_mode)(struct amdgpu_device *adev);
 	int (*switch_partition_mode)(struct amdgpu_device *adev,
@@ -474,9 +518,9 @@ struct amdgpu_gfx {
 	bool				enable_cleaner_shader;
 	struct amdgpu_isolation_work	enforce_isolation[MAX_XCP];
 	/* Mutex for synchronizing KFD scheduler operations */
-	struct mutex                    kfd_sch_mutex;
-	u64				kfd_sch_req_count[MAX_XCP];
-	bool				kfd_sch_inactive[MAX_XCP];
+	struct mutex                    userq_sch_mutex;
+	u64				userq_sch_req_count[MAX_XCP];
+	bool				userq_sch_inactive[MAX_XCP];
 	unsigned long			enforce_isolation_jiffies[MAX_XCP];
 	unsigned long			enforce_isolation_time[MAX_XCP];
 
@@ -484,6 +528,9 @@ struct amdgpu_gfx {
 	struct delayed_work		idle_work;
 	bool				workload_profile_active;
 	struct mutex                    workload_profile_mutex;
+
+	bool				disable_kq;
+	bool				disable_uq;
 };
 
 struct amdgpu_gfx_ras_reg_entry {
@@ -503,7 +550,7 @@ struct amdgpu_gfx_ras_mem_id_entry {
 #define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id)))
 #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id)))
 #define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev))
-#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si)))
+#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si), false))
 
 /**
  * amdgpu_gfx_create_bitmask - create a bitmask
@@ -550,8 +597,6 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
 					       struct amdgpu_ring *ring);
 bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
 						struct amdgpu_ring *ring);
-int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
-			       int pipe, int queue);
 bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
 				    int pipe, int queue);
 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
@@ -597,6 +642,9 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring);
 void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work);
 void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring);
 void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring);
+u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer);
+u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count);
+void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count);
 
 void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index ecb74ccf1d90..6b0fbbb91e57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -1230,6 +1230,10 @@ static ssize_t current_memory_partition_show(
 	struct amdgpu_device *adev = drm_to_adev(ddev);
 	enum amdgpu_memory_partition mode;
 
+	/* Only minimal precaution taken to reject requests while in reset */
+	if (amdgpu_in_reset(adev))
+		return -EPERM;
+
 	mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
 	if ((mode >= ARRAY_SIZE(nps_desc)) ||
 	    (BIT(mode) & AMDGPU_ALL_NPS_MASK) != BIT(mode))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
index b6cf801939aa..6e02fb9ac2f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -22,6 +22,7 @@
  */
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
+#include <uapi/linux/kfd_ioctl.h>
 
 int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev)
 {
@@ -46,3 +47,22 @@ int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev)
 	/* hdp ras follows amdgpu_ras_block_late_init_default for late init */
 	return 0;
 }
+
+void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
+			      struct amdgpu_ring *ring)
+{
+	if (!ring || !ring->funcs->emit_wreg) {
+		WREG32((adev->rmmio_remap.reg_offset +
+			KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >>
+			       2,
+		       0);
+		if (adev->nbio.funcs->get_memsize)
+			adev->nbio.funcs->get_memsize(adev);
+	} else {
+		amdgpu_ring_emit_wreg(ring,
+				      (adev->rmmio_remap.reg_offset +
+				       KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >>
+					      2,
+				      0);
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index 7b8a6152dc8d..4cfd932b7e91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -44,4 +44,6 @@ struct amdgpu_hdp {
 };
 
 int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev);
+void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
+			      struct amdgpu_ring *ring);
 #endif /* __AMDGPU_HDP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 2ea98ec60220..802743efa3b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -163,12 +163,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
 		init_shadow = false;
 	}
 
-	if (!ring->sched.ready && !ring->is_mes_queue) {
+	if (!ring->sched.ready) {
 		dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
 		return -EINVAL;
 	}
 
-	if (vm && !job->vmid && !ring->is_mes_queue) {
+	if (vm && !job->vmid) {
 		dev_err(adev->dev, "VM IB without ID\n");
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 4c4e087230ac..5dd78a9cb12d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -576,8 +576,16 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
 		INIT_LIST_HEAD(&id_mgr->ids_lru);
 		id_mgr->reserved_use_count = 0;
 
-		/* manage only VMIDs not used by KFD */
-		id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
+		/* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both GC and MM */
+		if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0))
+			/* manage only VMIDs not used by KFD */
+			id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
+		else if (AMDGPU_IS_MMHUB0(i) ||
+			 AMDGPU_IS_MMHUB1(i))
+			id_mgr->num_ids = 16;
+		else
+			/* manage only VMIDs not used by KFD */
+			id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
 
 		/* skip over VMID 0, since it is the system VM */
 		for (j = 1; j < id_mgr->num_ids; ++j) {
@@ -588,7 +596,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
 	}
 	/* alloc a default reserved vmid to enforce isolation */
 	for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
-		if (adev->enforce_isolation[i])
+		if (adev->enforce_isolation[i] != AMDGPU_ENFORCE_ISOLATION_DISABLE)
 			amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 901f8b12c672..30f16968b578 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -25,6 +25,7 @@
 
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
+#include "amdgpu_reset.h"
 
 /**
  * amdgpu_ih_ring_init - initialize the IH state
@@ -227,13 +228,23 @@ restart_ih:
 		ih->rptr &= ih->ptr_mask;
 	}
 
-	amdgpu_ih_set_rptr(adev, ih);
+	if (!ih->overflow)
+		amdgpu_ih_set_rptr(adev, ih);
+
 	wake_up_all(&ih->wait_process);
 
 	/* make sure wptr hasn't changed while processing */
 	wptr = amdgpu_ih_get_wptr(adev, ih);
 	if (wptr != ih->rptr)
-		goto restart_ih;
+		if (!ih->overflow)
+			goto restart_ih;
+
+	if (ih->overflow)
+		if (amdgpu_sriov_runtime(adev))
+			WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+				   &adev->virt.flr_work),
+				  "Failed to queue work! at %s",
+				  __func__);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index b0a88f92cd82..7f7ea046e209 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -72,6 +72,7 @@ struct amdgpu_ih_ring {
 	/* For waiting on IH processing at checkpoint. */
 	wait_queue_head_t wait_process;
 	uint64_t		processed_timestamp;
+	bool overflow;
 };
 
 /* return true if time stamp t2 is after t1 with 48bit wrap around */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index ce6b9ba967ff..f2c049129661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -78,6 +78,7 @@ struct amdgpu_job {
 
 	/* enforce isolation */
 	bool			enforce_isolation;
+	bool			run_cleaner_shader;
 
 	uint32_t		num_ibs;
 	struct amdgpu_ib	ibs[];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 27bfe9c8af06..8d4a2aed7231 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -45,6 +45,7 @@
 #include "amdgpu_ras.h"
 #include "amdgpu_reset.h"
 #include "amd_pcie.h"
+#include "amdgpu_userq.h"
 
 void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
 {
@@ -370,6 +371,26 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
 	return 0;
 }
 
+static int amdgpu_userq_metadata_info_gfx(struct amdgpu_device *adev,
+					  struct drm_amdgpu_info *info,
+					  struct drm_amdgpu_info_uq_metadata_gfx *meta)
+{
+	int ret = -EOPNOTSUPP;
+
+	if (adev->gfx.funcs->get_gfx_shadow_info) {
+		struct amdgpu_gfx_shadow_info shadow = {};
+
+		adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true);
+		meta->shadow_size = shadow.shadow_size;
+		meta->shadow_alignment = shadow.shadow_alignment;
+		meta->csa_size = shadow.csa_size;
+		meta->csa_alignment = shadow.csa_alignment;
+		ret = 0;
+	}
+
+	return ret;
+}
+
 static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 			     struct drm_amdgpu_info *info,
 			     struct drm_amdgpu_info_hw_ip *result)
@@ -387,7 +408,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 	case AMDGPU_HW_IP_GFX:
 		type = AMD_IP_BLOCK_TYPE_GFX;
 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-			if (adev->gfx.gfx_ring[i].sched.ready)
+			if (adev->gfx.gfx_ring[i].sched.ready &&
+			    !adev->gfx.gfx_ring[i].no_user_submission)
 				++num_rings;
 		ib_start_alignment = 32;
 		ib_size_alignment = 32;
@@ -395,7 +417,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 	case AMDGPU_HW_IP_COMPUTE:
 		type = AMD_IP_BLOCK_TYPE_GFX;
 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
-			if (adev->gfx.compute_ring[i].sched.ready)
+			if (adev->gfx.compute_ring[i].sched.ready &&
+			    !adev->gfx.compute_ring[i].no_user_submission)
 				++num_rings;
 		ib_start_alignment = 32;
 		ib_size_alignment = 32;
@@ -403,7 +426,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 	case AMDGPU_HW_IP_DMA:
 		type = AMD_IP_BLOCK_TYPE_SDMA;
 		for (i = 0; i < adev->sdma.num_instances; i++)
-			if (adev->sdma.instance[i].ring.sched.ready)
+			if (adev->sdma.instance[i].ring.sched.ready &&
+			    !adev->sdma.instance[i].ring.no_user_submission)
 				++num_rings;
 		ib_start_alignment = 256;
 		ib_size_alignment = 4;
@@ -414,7 +438,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 			if (adev->uvd.harvest_config & (1 << i))
 				continue;
 
-			if (adev->uvd.inst[i].ring.sched.ready)
+			if (adev->uvd.inst[i].ring.sched.ready &&
+			    !adev->uvd.inst[i].ring.no_user_submission)
 				++num_rings;
 		}
 		ib_start_alignment = 256;
@@ -423,7 +448,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 	case AMDGPU_HW_IP_VCE:
 		type = AMD_IP_BLOCK_TYPE_VCE;
 		for (i = 0; i < adev->vce.num_rings; i++)
-			if (adev->vce.ring[i].sched.ready)
+			if (adev->vce.ring[i].sched.ready &&
+			    !adev->vce.ring[i].no_user_submission)
 				++num_rings;
 		ib_start_alignment = 256;
 		ib_size_alignment = 4;
@@ -435,7 +461,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 				continue;
 
 			for (j = 0; j < adev->uvd.num_enc_rings; j++)
-				if (adev->uvd.inst[i].ring_enc[j].sched.ready)
+				if (adev->uvd.inst[i].ring_enc[j].sched.ready &&
+				    !adev->uvd.inst[i].ring_enc[j].no_user_submission)
 					++num_rings;
 		}
 		ib_start_alignment = 256;
@@ -447,7 +474,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 			if (adev->vcn.harvest_config & (1 << i))
 				continue;
 
-			if (adev->vcn.inst[i].ring_dec.sched.ready)
+			if (adev->vcn.inst[i].ring_dec.sched.ready &&
+			    !adev->vcn.inst[i].ring_dec.no_user_submission)
 				++num_rings;
 		}
 		ib_start_alignment = 256;
@@ -460,7 +488,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 				continue;
 
 			for (j = 0; j < adev->vcn.inst[i].num_enc_rings; j++)
-				if (adev->vcn.inst[i].ring_enc[j].sched.ready)
+				if (adev->vcn.inst[i].ring_enc[j].sched.ready &&
+				    !adev->vcn.inst[i].ring_enc[j].no_user_submission)
 					++num_rings;
 		}
 		ib_start_alignment = 256;
@@ -475,7 +504,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 				continue;
 
 			for (j = 0; j < adev->jpeg.num_jpeg_rings; j++)
-				if (adev->jpeg.inst[i].ring_dec[j].sched.ready)
+				if (adev->jpeg.inst[i].ring_dec[j].sched.ready &&
+				    !adev->jpeg.inst[i].ring_dec[j].no_user_submission)
 					++num_rings;
 		}
 		ib_start_alignment = 256;
@@ -483,7 +513,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 		break;
 	case AMDGPU_HW_IP_VPE:
 		type = AMD_IP_BLOCK_TYPE_VPE;
-		if (adev->vpe.ring.sched.ready)
+		if (adev->vpe.ring.sched.ready &&
+		    !adev->vpe.ring.no_user_submission)
 			++num_rings;
 		ib_start_alignment = 256;
 		ib_size_alignment = 4;
@@ -978,6 +1009,8 @@ out:
 			}
 		}
 
+		dev_info->userq_ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+
 		ret = copy_to_user(out, dev_info,
 				   min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
 		kfree(dev_info);
@@ -1293,6 +1326,22 @@ out:
 		return copy_to_user(out, &gpuvm_fault,
 				    min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
 	}
+	case AMDGPU_INFO_UQ_FW_AREAS: {
+		struct drm_amdgpu_info_uq_metadata meta_info = {};
+
+		switch (info->query_hw_ip.type) {
+		case AMDGPU_HW_IP_GFX:
+			ret = amdgpu_userq_metadata_info_gfx(adev, info, &meta_info.gfx);
+			if (ret)
+				return ret;
+
+			ret = copy_to_user(out, &meta_info,
+						min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0;
+			return 0;
+		default:
+			return -EINVAL;
+		}
+	}
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->query);
 		return -EINVAL;
@@ -1376,8 +1425,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	mutex_init(&fpriv->bo_list_lock);
 	idr_init_base(&fpriv->bo_list_handles, 1);
 
+	r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
+	if (r)
+		goto error_vm;
+
 	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
 
+	r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, file_priv, adev);
+	if (r)
+		DRM_WARN("Can't setup usermode queues, use legacy workload submission only\n");
+
 	file_priv->driver_priv = fpriv;
 	goto out_suspend;
 
@@ -1445,6 +1502,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 		amdgpu_bo_unreserve(pd);
 	}
 
+	fpriv->evf_mgr.fd_closing = true;
+	amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
+	amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
+
 	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
 	amdgpu_vm_fini(adev, &fpriv->vm);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index fb212f0a1136..2febb63ab232 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -39,42 +39,6 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
 		       PAGE_SIZE);
 }
 
-static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev,
-					 int ip_type, uint64_t *doorbell_index)
-{
-	unsigned int offset, found;
-	struct amdgpu_mes *mes = &adev->mes;
-
-	if (ip_type == AMDGPU_RING_TYPE_SDMA)
-		offset = adev->doorbell_index.sdma_engine[0];
-	else
-		offset = 0;
-
-	found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset);
-	if (found >= mes->num_mes_dbs) {
-		DRM_WARN("No doorbell available\n");
-		return -ENOSPC;
-	}
-
-	set_bit(found, mes->doorbell_bitmap);
-
-	/* Get the absolute doorbell index on BAR */
-	*doorbell_index = mes->db_start_dw_offset + found * 2;
-	return 0;
-}
-
-static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev,
-					   uint32_t doorbell_index)
-{
-	unsigned int old, rel_index;
-	struct amdgpu_mes *mes = &adev->mes;
-
-	/* Find the relative index of the doorbell in this object */
-	rel_index = (doorbell_index - mes->db_start_dw_offset) / 2;
-	old = test_and_clear_bit(rel_index, mes->doorbell_bitmap);
-	WARN_ON(!old);
-}
-
 static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
 {
 	int i;
@@ -126,7 +90,7 @@ static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
 
 int amdgpu_mes_init(struct amdgpu_device *adev)
 {
-	int i, r;
+	int i, r, num_pipes;
 
 	adev->mes.adev = adev;
 
@@ -142,19 +106,52 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
 
 	adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
 	adev->mes.vmid_mask_mmhub = 0xffffff00;
-	adev->mes.vmid_mask_gfxhub = 0xffffff00;
+	adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xfffffffe : 0xffffff00;
+
+	num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me;
+	if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES)
+		dev_warn(adev->dev, "more gfx pipes than supported by MES! (%d vs %d)\n",
+			 num_pipes, AMDGPU_MES_MAX_GFX_PIPES);
+
+	for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) {
+		if (i >= num_pipes)
+			break;
+		if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
+		    IP_VERSION(12, 0, 0))
+			/*
+			 * GFX V12 has only one GFX pipe, but 8 queues in it.
+			 * GFX pipe 0 queue 0 is being used by Kernel queue.
+			 * Set GFX pipe 0 queue 1-7 for MES scheduling
+			 * mask = 1111 1110b
+			 */
+			adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0xFF : 0xFE;
+		else
+			/*
+			 * GFX pipe 0 queue 0 is being used by Kernel queue.
+			 * Set GFX pipe 0 queue 1 for MES scheduling
+			 * mask = 10b
+			 */
+			adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0x3 : 0x2;
+	}
+
+	num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec;
+	if (num_pipes > AMDGPU_MES_MAX_COMPUTE_PIPES)
+		dev_warn(adev->dev, "more compute pipes than supported by MES! (%d vs %d)\n",
+			 num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES);
 
 	for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
-		if (i >= (adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec))
+		if (i >= num_pipes)
 			break;
-		adev->mes.compute_hqd_mask[i] = 0xc;
+		adev->mes.compute_hqd_mask[i] = adev->gfx.disable_kq ? 0xF : 0xC;
 	}
 
-	for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
-		adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
+	num_pipes = adev->sdma.num_instances;
+	if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES)
+		dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n",
+			 num_pipes, AMDGPU_MES_MAX_SDMA_PIPES);
 
 	for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
-		if (i >= adev->sdma.num_instances)
+		if (i >= num_pipes)
 			break;
 		adev->mes.sdma_hqd_mask[i] = 0xfc;
 	}
@@ -240,244 +237,6 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)
 	mutex_destroy(&adev->mes.mutex_hidden);
 }
 
-static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
-{
-	amdgpu_bo_free_kernel(&q->mqd_obj,
-			      &q->mqd_gpu_addr,
-			      &q->mqd_cpu_ptr);
-}
-
-int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
-			      struct amdgpu_vm *vm)
-{
-	struct amdgpu_mes_process *process;
-	int r;
-
-	/* allocate the mes process buffer */
-	process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL);
-	if (!process) {
-		DRM_ERROR("no more memory to create mes process\n");
-		return -ENOMEM;
-	}
-
-	/* allocate the process context bo and map it */
-	r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
-				    AMDGPU_GEM_DOMAIN_GTT,
-				    &process->proc_ctx_bo,
-				    &process->proc_ctx_gpu_addr,
-				    &process->proc_ctx_cpu_ptr);
-	if (r) {
-		DRM_ERROR("failed to allocate process context bo\n");
-		goto clean_up_memory;
-	}
-	memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
-
-	/*
-	 * Avoid taking any other locks under MES lock to avoid circular
-	 * lock dependencies.
-	 */
-	amdgpu_mes_lock(&adev->mes);
-
-	/* add the mes process to idr list */
-	r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
-		      GFP_KERNEL);
-	if (r < 0) {
-		DRM_ERROR("failed to lock pasid=%d\n", pasid);
-		goto clean_up_ctx;
-	}
-
-	INIT_LIST_HEAD(&process->gang_list);
-	process->vm = vm;
-	process->pasid = pasid;
-	process->process_quantum = adev->mes.default_process_quantum;
-	process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo);
-
-	amdgpu_mes_unlock(&adev->mes);
-	return 0;
-
-clean_up_ctx:
-	amdgpu_mes_unlock(&adev->mes);
-	amdgpu_bo_free_kernel(&process->proc_ctx_bo,
-			      &process->proc_ctx_gpu_addr,
-			      &process->proc_ctx_cpu_ptr);
-clean_up_memory:
-	kfree(process);
-	return r;
-}
-
-void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
-{
-	struct amdgpu_mes_process *process;
-	struct amdgpu_mes_gang *gang, *tmp1;
-	struct amdgpu_mes_queue *queue, *tmp2;
-	struct mes_remove_queue_input queue_input;
-	unsigned long flags;
-	int r;
-
-	/*
-	 * Avoid taking any other locks under MES lock to avoid circular
-	 * lock dependencies.
-	 */
-	amdgpu_mes_lock(&adev->mes);
-
-	process = idr_find(&adev->mes.pasid_idr, pasid);
-	if (!process) {
-		DRM_WARN("pasid %d doesn't exist\n", pasid);
-		amdgpu_mes_unlock(&adev->mes);
-		return;
-	}
-
-	/* Remove all queues from hardware */
-	list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
-		list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
-			spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
-			idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
-			spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-
-			queue_input.doorbell_offset = queue->doorbell_off;
-			queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
-
-			r = adev->mes.funcs->remove_hw_queue(&adev->mes,
-							     &queue_input);
-			if (r)
-				DRM_WARN("failed to remove hardware queue\n");
-		}
-
-		idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
-	}
-
-	idr_remove(&adev->mes.pasid_idr, pasid);
-	amdgpu_mes_unlock(&adev->mes);
-
-	/* free all memory allocated by the process */
-	list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
-		/* free all queues in the gang */
-		list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
-			amdgpu_mes_queue_free_mqd(queue);
-			list_del(&queue->list);
-			kfree(queue);
-		}
-		amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
-				      &gang->gang_ctx_gpu_addr,
-				      &gang->gang_ctx_cpu_ptr);
-		list_del(&gang->list);
-		kfree(gang);
-
-	}
-	amdgpu_bo_free_kernel(&process->proc_ctx_bo,
-			      &process->proc_ctx_gpu_addr,
-			      &process->proc_ctx_cpu_ptr);
-	kfree(process);
-}
-
-int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
-			struct amdgpu_mes_gang_properties *gprops,
-			int *gang_id)
-{
-	struct amdgpu_mes_process *process;
-	struct amdgpu_mes_gang *gang;
-	int r;
-
-	/* allocate the mes gang buffer */
-	gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL);
-	if (!gang) {
-		return -ENOMEM;
-	}
-
-	/* allocate the gang context bo and map it to cpu space */
-	r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE,
-				    AMDGPU_GEM_DOMAIN_GTT,
-				    &gang->gang_ctx_bo,
-				    &gang->gang_ctx_gpu_addr,
-				    &gang->gang_ctx_cpu_ptr);
-	if (r) {
-		DRM_ERROR("failed to allocate process context bo\n");
-		goto clean_up_mem;
-	}
-	memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
-
-	/*
-	 * Avoid taking any other locks under MES lock to avoid circular
-	 * lock dependencies.
-	 */
-	amdgpu_mes_lock(&adev->mes);
-
-	process = idr_find(&adev->mes.pasid_idr, pasid);
-	if (!process) {
-		DRM_ERROR("pasid %d doesn't exist\n", pasid);
-		r = -EINVAL;
-		goto clean_up_ctx;
-	}
-
-	/* add the mes gang to idr list */
-	r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
-		      GFP_KERNEL);
-	if (r < 0) {
-		DRM_ERROR("failed to allocate idr for gang\n");
-		goto clean_up_ctx;
-	}
-
-	gang->gang_id = r;
-	*gang_id = r;
-
-	INIT_LIST_HEAD(&gang->queue_list);
-	gang->process = process;
-	gang->priority = gprops->priority;
-	gang->gang_quantum = gprops->gang_quantum ?
-		gprops->gang_quantum : adev->mes.default_gang_quantum;
-	gang->global_priority_level = gprops->global_priority_level;
-	gang->inprocess_gang_priority = gprops->inprocess_gang_priority;
-	list_add_tail(&gang->list, &process->gang_list);
-
-	amdgpu_mes_unlock(&adev->mes);
-	return 0;
-
-clean_up_ctx:
-	amdgpu_mes_unlock(&adev->mes);
-	amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
-			      &gang->gang_ctx_gpu_addr,
-			      &gang->gang_ctx_cpu_ptr);
-clean_up_mem:
-	kfree(gang);
-	return r;
-}
-
-int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id)
-{
-	struct amdgpu_mes_gang *gang;
-
-	/*
-	 * Avoid taking any other locks under MES lock to avoid circular
-	 * lock dependencies.
-	 */
-	amdgpu_mes_lock(&adev->mes);
-
-	gang = idr_find(&adev->mes.gang_id_idr, gang_id);
-	if (!gang) {
-		DRM_ERROR("gang id %d doesn't exist\n", gang_id);
-		amdgpu_mes_unlock(&adev->mes);
-		return -EINVAL;
-	}
-
-	if (!list_empty(&gang->queue_list)) {
-		DRM_ERROR("queue list is not empty\n");
-		amdgpu_mes_unlock(&adev->mes);
-		return -EBUSY;
-	}
-
-	idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
-	list_del(&gang->list);
-	amdgpu_mes_unlock(&adev->mes);
-
-	amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
-			      &gang->gang_ctx_gpu_addr,
-			      &gang->gang_ctx_cpu_ptr);
-
-	kfree(gang);
-
-	return 0;
-}
-
 int amdgpu_mes_suspend(struct amdgpu_device *adev)
 {
 	struct mes_suspend_gang_input input;
@@ -526,304 +285,6 @@ int amdgpu_mes_resume(struct amdgpu_device *adev)
 	return r;
 }
 
-static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
-				     struct amdgpu_mes_queue *q,
-				     struct amdgpu_mes_queue_properties *p)
-{
-	struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
-	u32 mqd_size = mqd_mgr->mqd_size;
-	int r;
-
-	r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
-				    AMDGPU_GEM_DOMAIN_GTT,
-				    &q->mqd_obj,
-				    &q->mqd_gpu_addr, &q->mqd_cpu_ptr);
-	if (r) {
-		dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r);
-		return r;
-	}
-	memset(q->mqd_cpu_ptr, 0, mqd_size);
-
-	r = amdgpu_bo_reserve(q->mqd_obj, false);
-	if (unlikely(r != 0))
-		goto clean_up;
-
-	return 0;
-
-clean_up:
-	amdgpu_bo_free_kernel(&q->mqd_obj,
-			      &q->mqd_gpu_addr,
-			      &q->mqd_cpu_ptr);
-	return r;
-}
-
-static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
-				     struct amdgpu_mes_queue *q,
-				     struct amdgpu_mes_queue_properties *p)
-{
-	struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
-	struct amdgpu_mqd_prop mqd_prop = {0};
-
-	mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr;
-	mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr;
-	mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr;
-	mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr;
-	mqd_prop.queue_size = p->queue_size;
-	mqd_prop.use_doorbell = true;
-	mqd_prop.doorbell_index = p->doorbell_off;
-	mqd_prop.eop_gpu_addr = p->eop_gpu_addr;
-	mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority;
-	mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
-	mqd_prop.hqd_active = false;
-
-	if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
-	    p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
-		mutex_lock(&adev->srbm_mutex);
-		amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0);
-	}
-
-	mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
-
-	if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
-	    p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
-		amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
-		mutex_unlock(&adev->srbm_mutex);
-	}
-
-	amdgpu_bo_unreserve(q->mqd_obj);
-}
-
-int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
-			    struct amdgpu_mes_queue_properties *qprops,
-			    int *queue_id)
-{
-	struct amdgpu_mes_queue *queue;
-	struct amdgpu_mes_gang *gang;
-	struct mes_add_queue_input queue_input;
-	unsigned long flags;
-	int r;
-
-	memset(&queue_input, 0, sizeof(struct mes_add_queue_input));
-
-	/* allocate the mes queue buffer */
-	queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
-	if (!queue) {
-		DRM_ERROR("Failed to allocate memory for queue\n");
-		return -ENOMEM;
-	}
-
-	/* Allocate the queue mqd */
-	r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops);
-	if (r)
-		goto clean_up_memory;
-
-	/*
-	 * Avoid taking any other locks under MES lock to avoid circular
-	 * lock dependencies.
-	 */
-	amdgpu_mes_lock(&adev->mes);
-
-	gang = idr_find(&adev->mes.gang_id_idr, gang_id);
-	if (!gang) {
-		DRM_ERROR("gang id %d doesn't exist\n", gang_id);
-		r = -EINVAL;
-		goto clean_up_mqd;
-	}
-
-	/* add the mes gang to idr list */
-	spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
-	r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0,
-		      GFP_ATOMIC);
-	if (r < 0) {
-		spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-		goto clean_up_mqd;
-	}
-	spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-	*queue_id = queue->queue_id = r;
-
-	/* allocate a doorbell index for the queue */
-	r = amdgpu_mes_kernel_doorbell_get(adev,
-					  qprops->queue_type,
-					  &qprops->doorbell_off);
-	if (r)
-		goto clean_up_queue_id;
-
-	/* initialize the queue mqd */
-	amdgpu_mes_queue_init_mqd(adev, queue, qprops);
-
-	/* add hw queue to mes */
-	queue_input.process_id = gang->process->pasid;
-
-	queue_input.page_table_base_addr =
-		adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr -
-		adev->gmc.vram_start;
-
-	queue_input.process_va_start = 0;
-	queue_input.process_va_end =
-		(adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
-	queue_input.process_quantum = gang->process->process_quantum;
-	queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr;
-	queue_input.gang_quantum = gang->gang_quantum;
-	queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
-	queue_input.inprocess_gang_priority = gang->inprocess_gang_priority;
-	queue_input.gang_global_priority_level = gang->global_priority_level;
-	queue_input.doorbell_offset = qprops->doorbell_off;
-	queue_input.mqd_addr = queue->mqd_gpu_addr;
-	queue_input.wptr_addr = qprops->wptr_gpu_addr;
-	queue_input.wptr_mc_addr = qprops->wptr_mc_addr;
-	queue_input.queue_type = qprops->queue_type;
-	queue_input.paging = qprops->paging;
-	queue_input.is_kfd_process = 0;
-
-	r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
-	if (r) {
-		DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",
-			  qprops->doorbell_off);
-		goto clean_up_doorbell;
-	}
-
-	DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "
-		  "queue type=%d, doorbell=0x%llx\n",
-		  gang->process->pasid, gang_id, qprops->queue_type,
-		  qprops->doorbell_off);
-
-	queue->ring = qprops->ring;
-	queue->doorbell_off = qprops->doorbell_off;
-	queue->wptr_gpu_addr = qprops->wptr_gpu_addr;
-	queue->queue_type = qprops->queue_type;
-	queue->paging = qprops->paging;
-	queue->gang = gang;
-	queue->ring->mqd_ptr = queue->mqd_cpu_ptr;
-	list_add_tail(&queue->list, &gang->queue_list);
-
-	amdgpu_mes_unlock(&adev->mes);
-	return 0;
-
-clean_up_doorbell:
-	amdgpu_mes_kernel_doorbell_free(adev, qprops->doorbell_off);
-clean_up_queue_id:
-	spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
-	idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
-	spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-clean_up_mqd:
-	amdgpu_mes_unlock(&adev->mes);
-	amdgpu_mes_queue_free_mqd(queue);
-clean_up_memory:
-	kfree(queue);
-	return r;
-}
-
-int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
-{
-	unsigned long flags;
-	struct amdgpu_mes_queue *queue;
-	struct amdgpu_mes_gang *gang;
-	struct mes_remove_queue_input queue_input;
-	int r;
-
-	/*
-	 * Avoid taking any other locks under MES lock to avoid circular
-	 * lock dependencies.
-	 */
-	amdgpu_mes_lock(&adev->mes);
-
-	/* remove the mes gang from idr list */
-	spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
-
-	queue = idr_find(&adev->mes.queue_id_idr, queue_id);
-	if (!queue) {
-		spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-		amdgpu_mes_unlock(&adev->mes);
-		DRM_ERROR("queue id %d doesn't exist\n", queue_id);
-		return -EINVAL;
-	}
-
-	idr_remove(&adev->mes.queue_id_idr, queue_id);
-	spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-
-	DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n",
-		  queue->doorbell_off);
-
-	gang = queue->gang;
-	queue_input.doorbell_offset = queue->doorbell_off;
-	queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
-
-	r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
-	if (r)
-		DRM_ERROR("failed to remove hardware queue, queue id = %d\n",
-			  queue_id);
-
-	list_del(&queue->list);
-	amdgpu_mes_kernel_doorbell_free(adev, queue->doorbell_off);
-	amdgpu_mes_unlock(&adev->mes);
-
-	amdgpu_mes_queue_free_mqd(queue);
-	kfree(queue);
-	return 0;
-}
-
-int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id)
-{
-	unsigned long flags;
-	struct amdgpu_mes_queue *queue;
-	struct amdgpu_mes_gang *gang;
-	struct mes_reset_queue_input queue_input;
-	int r;
-
-	/*
-	 * Avoid taking any other locks under MES lock to avoid circular
-	 * lock dependencies.
-	 */
-	amdgpu_mes_lock(&adev->mes);
-
-	/* remove the mes gang from idr list */
-	spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
-
-	queue = idr_find(&adev->mes.queue_id_idr, queue_id);
-	if (!queue) {
-		spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-		amdgpu_mes_unlock(&adev->mes);
-		DRM_ERROR("queue id %d doesn't exist\n", queue_id);
-		return -EINVAL;
-	}
-	spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-
-	DRM_DEBUG("try to reset queue, doorbell off = 0x%llx\n",
-		  queue->doorbell_off);
-
-	gang = queue->gang;
-	queue_input.doorbell_offset = queue->doorbell_off;
-	queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
-
-	r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input);
-	if (r)
-		DRM_ERROR("failed to reset hardware queue, queue id = %d\n",
-			  queue_id);
-
-	amdgpu_mes_unlock(&adev->mes);
-
-	return 0;
-}
-
-int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type,
-				   int me_id, int pipe_id, int queue_id, int vmid)
-{
-	struct mes_reset_queue_input queue_input;
-	int r;
-
-	queue_input.queue_type = queue_type;
-	queue_input.use_mmio = true;
-	queue_input.me_id = me_id;
-	queue_input.pipe_id = pipe_id;
-	queue_input.queue_id = queue_id;
-	queue_input.vmid = vmid;
-	r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input);
-	if (r)
-		DRM_ERROR("failed to reset hardware queue by mmio, queue id = %d\n",
-			  queue_id);
-	return r;
-}
-
 int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
 				struct amdgpu_ring *ring)
 {
@@ -874,7 +335,7 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
 				  unsigned int vmid,
 				  bool use_mmio)
 {
-	struct mes_reset_legacy_queue_input queue_input;
+	struct mes_reset_queue_input queue_input;
 	int r;
 
 	memset(&queue_input, 0, sizeof(queue_input));
@@ -888,8 +349,11 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
 	queue_input.wptr_addr = ring->wptr_gpu_addr;
 	queue_input.vmid = vmid;
 	queue_input.use_mmio = use_mmio;
+	queue_input.is_kq = true;
+	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+		queue_input.legacy_gfx = true;
 
-	r = adev->mes.funcs->reset_legacy_queue(&adev->mes, &queue_input);
+	r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input);
 	if (r)
 		DRM_ERROR("failed to reset legacy queue\n");
 
@@ -905,7 +369,7 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
 	uint32_t *read_val_ptr;
 
 	if (amdgpu_device_wb_get(adev, &addr_offset)) {
-		DRM_ERROR("critical bug! too many mes readers\n");
+		dev_err(adev->dev, "critical bug! too many mes readers\n");
 		goto error;
 	}
 	read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4);
@@ -915,13 +379,13 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
 	op_input.read_reg.buffer_addr = read_val_gpu_addr;
 
 	if (!adev->mes.funcs->misc_op) {
-		DRM_ERROR("mes rreg is not supported!\n");
+		dev_err(adev->dev, "mes rreg is not supported!\n");
 		goto error;
 	}
 
 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
 	if (r)
-		DRM_ERROR("failed to read reg (0x%x)\n", reg);
+		dev_err(adev->dev, "failed to read reg (0x%x)\n", reg);
 	else
 		val = *(read_val_ptr);
 
@@ -942,14 +406,14 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev,
 	op_input.write_reg.reg_value = val;
 
 	if (!adev->mes.funcs->misc_op) {
-		DRM_ERROR("mes wreg is not supported!\n");
+		dev_err(adev->dev, "mes wreg is not supported!\n");
 		r = -EINVAL;
 		goto error;
 	}
 
 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
 	if (r)
-		DRM_ERROR("failed to write reg (0x%x)\n", reg);
+		dev_err(adev->dev, "failed to write reg (0x%x)\n", reg);
 
 error:
 	return r;
@@ -969,14 +433,14 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
 	op_input.wrm_reg.mask = mask;
 
 	if (!adev->mes.funcs->misc_op) {
-		DRM_ERROR("mes reg_write_reg_wait is not supported!\n");
+		dev_err(adev->dev, "mes reg_write_reg_wait is not supported!\n");
 		r = -EINVAL;
 		goto error;
 	}
 
 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
 	if (r)
-		DRM_ERROR("failed to reg_write_reg_wait\n");
+		dev_err(adev->dev, "failed to reg_write_reg_wait\n");
 
 error:
 	return r;
@@ -994,14 +458,14 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
 	op_input.wrm_reg.mask = mask;
 
 	if (!adev->mes.funcs->misc_op) {
-		DRM_ERROR("mes reg wait is not supported!\n");
+		dev_err(adev->dev, "mes reg wait is not supported!\n");
 		r = -EINVAL;
 		goto error;
 	}
 
 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
 	if (r)
-		DRM_ERROR("failed to reg_write_reg_wait\n");
+		dev_err(adev->dev, "failed to reg_write_reg_wait\n");
 
 error:
 	return r;
@@ -1075,25 +539,6 @@ int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
 	return r;
 }
 
-static void
-amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
-			       struct amdgpu_ring *ring,
-			       struct amdgpu_mes_queue_properties *props)
-{
-	props->queue_type = ring->funcs->type;
-	props->hqd_base_gpu_addr = ring->gpu_addr;
-	props->rptr_gpu_addr = ring->rptr_gpu_addr;
-	props->wptr_gpu_addr = ring->wptr_gpu_addr;
-	props->wptr_mc_addr =
-		ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs;
-	props->queue_size = ring->ring_size;
-	props->eop_gpu_addr = ring->eop_gpu_addr;
-	props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
-	props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
-	props->paging = false;
-	props->ring = ring;
-}
-
 #define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng)			\
 do {									\
        if (id_offs < AMDGPU_MES_CTX_MAX_OFFS)				\
@@ -1130,453 +575,12 @@ int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs)
 	return -EINVAL;
 }
 
-int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
-			int queue_type, int idx,
-			struct amdgpu_mes_ctx_data *ctx_data,
-			struct amdgpu_ring **out)
-{
-	struct amdgpu_ring *ring;
-	struct amdgpu_mes_gang *gang;
-	struct amdgpu_mes_queue_properties qprops = {0};
-	int r, queue_id, pasid;
-
-	/*
-	 * Avoid taking any other locks under MES lock to avoid circular
-	 * lock dependencies.
-	 */
-	amdgpu_mes_lock(&adev->mes);
-	gang = idr_find(&adev->mes.gang_id_idr, gang_id);
-	if (!gang) {
-		DRM_ERROR("gang id %d doesn't exist\n", gang_id);
-		amdgpu_mes_unlock(&adev->mes);
-		return -EINVAL;
-	}
-	pasid = gang->process->pasid;
-
-	ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
-	if (!ring) {
-		amdgpu_mes_unlock(&adev->mes);
-		return -ENOMEM;
-	}
-
-	ring->ring_obj = NULL;
-	ring->use_doorbell = true;
-	ring->is_mes_queue = true;
-	ring->mes_ctx = ctx_data;
-	ring->idx = idx;
-	ring->no_scheduler = true;
-
-	if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
-		int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
-				      compute[ring->idx].mec_hpd);
-		ring->eop_gpu_addr =
-			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-	}
-
-	switch (queue_type) {
-	case AMDGPU_RING_TYPE_GFX:
-		ring->funcs = adev->gfx.gfx_ring[0].funcs;
-		ring->me = adev->gfx.gfx_ring[0].me;
-		ring->pipe = adev->gfx.gfx_ring[0].pipe;
-		break;
-	case AMDGPU_RING_TYPE_COMPUTE:
-		ring->funcs = adev->gfx.compute_ring[0].funcs;
-		ring->me = adev->gfx.compute_ring[0].me;
-		ring->pipe = adev->gfx.compute_ring[0].pipe;
-		break;
-	case AMDGPU_RING_TYPE_SDMA:
-		ring->funcs = adev->sdma.instance[0].ring.funcs;
-		break;
-	default:
-		BUG();
-	}
-
-	r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
-			     AMDGPU_RING_PRIO_DEFAULT, NULL);
-	if (r) {
-		amdgpu_mes_unlock(&adev->mes);
-		goto clean_up_memory;
-	}
-
-	amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
-
-	dma_fence_wait(gang->process->vm->last_update, false);
-	dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
-	amdgpu_mes_unlock(&adev->mes);
-
-	r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
-	if (r)
-		goto clean_up_ring;
-
-	ring->hw_queue_id = queue_id;
-	ring->doorbell_index = qprops.doorbell_off;
-
-	if (queue_type == AMDGPU_RING_TYPE_GFX)
-		sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);
-	else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
-		sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
-			queue_id);
-	else if (queue_type == AMDGPU_RING_TYPE_SDMA)
-		sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
-			queue_id);
-	else
-		BUG();
-
-	*out = ring;
-	return 0;
-
-clean_up_ring:
-	amdgpu_ring_fini(ring);
-clean_up_memory:
-	kfree(ring);
-	return r;
-}
-
-void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
-			    struct amdgpu_ring *ring)
-{
-	if (!ring)
-		return;
-
-	amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id);
-	timer_delete_sync(&ring->fence_drv.fallback_timer);
-	amdgpu_ring_fini(ring);
-	kfree(ring);
-}
-
 uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
 						   enum amdgpu_mes_priority_level prio)
 {
 	return adev->mes.aggregated_doorbells[prio];
 }
 
-int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
-				   struct amdgpu_mes_ctx_data *ctx_data)
-{
-	int r;
-
-	r = amdgpu_bo_create_kernel(adev,
-			    sizeof(struct amdgpu_mes_ctx_meta_data),
-			    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
-			    &ctx_data->meta_data_obj,
-			    &ctx_data->meta_data_mc_addr,
-			    &ctx_data->meta_data_ptr);
-	if (r) {
-		dev_warn(adev->dev, "(%d) create CTX bo failed\n", r);
-		return r;
-	}
-
-	if (!ctx_data->meta_data_obj)
-		return -ENOMEM;
-
-	memset(ctx_data->meta_data_ptr, 0,
-	       sizeof(struct amdgpu_mes_ctx_meta_data));
-
-	return 0;
-}
-
-void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data)
-{
-	if (ctx_data->meta_data_obj)
-		amdgpu_bo_free_kernel(&ctx_data->meta_data_obj,
-				      &ctx_data->meta_data_mc_addr,
-				      &ctx_data->meta_data_ptr);
-}
-
-int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
-				 struct amdgpu_vm *vm,
-				 struct amdgpu_mes_ctx_data *ctx_data)
-{
-	struct amdgpu_bo_va *bo_va;
-	struct amdgpu_sync sync;
-	struct drm_exec exec;
-	int r;
-
-	amdgpu_sync_create(&sync);
-
-	drm_exec_init(&exec, 0, 0);
-	drm_exec_until_all_locked(&exec) {
-		r = drm_exec_lock_obj(&exec,
-				      &ctx_data->meta_data_obj->tbo.base);
-		drm_exec_retry_on_contention(&exec);
-		if (unlikely(r))
-			goto error_fini_exec;
-
-		r = amdgpu_vm_lock_pd(vm, &exec, 0);
-		drm_exec_retry_on_contention(&exec);
-		if (unlikely(r))
-			goto error_fini_exec;
-	}
-
-	bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj);
-	if (!bo_va) {
-		DRM_ERROR("failed to create bo_va for meta data BO\n");
-		r = -ENOMEM;
-		goto error_fini_exec;
-	}
-
-	r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0,
-			     sizeof(struct amdgpu_mes_ctx_meta_data),
-			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
-			     AMDGPU_PTE_EXECUTABLE);
-
-	if (r) {
-		DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r);
-		goto error_del_bo_va;
-	}
-
-	r = amdgpu_vm_bo_update(adev, bo_va, false);
-	if (r) {
-		DRM_ERROR("failed to do vm_bo_update on meta data\n");
-		goto error_del_bo_va;
-	}
-	amdgpu_sync_fence(&sync, bo_va->last_pt_update, GFP_KERNEL);
-
-	r = amdgpu_vm_update_pdes(adev, vm, false);
-	if (r) {
-		DRM_ERROR("failed to update pdes on meta data\n");
-		goto error_del_bo_va;
-	}
-	amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL);
-
-	amdgpu_sync_wait(&sync, false);
-	drm_exec_fini(&exec);
-
-	amdgpu_sync_free(&sync);
-	ctx_data->meta_data_va = bo_va;
-	return 0;
-
-error_del_bo_va:
-	amdgpu_vm_bo_del(adev, bo_va);
-
-error_fini_exec:
-	drm_exec_fini(&exec);
-	amdgpu_sync_free(&sync);
-	return r;
-}
-
-int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
-				   struct amdgpu_mes_ctx_data *ctx_data)
-{
-	struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;
-	struct amdgpu_bo *bo = ctx_data->meta_data_obj;
-	struct amdgpu_vm *vm = bo_va->base.vm;
-	struct dma_fence *fence;
-	struct drm_exec exec;
-	long r;
-
-	drm_exec_init(&exec, 0, 0);
-	drm_exec_until_all_locked(&exec) {
-		r = drm_exec_lock_obj(&exec,
-				      &ctx_data->meta_data_obj->tbo.base);
-		drm_exec_retry_on_contention(&exec);
-		if (unlikely(r))
-			goto out_unlock;
-
-		r = amdgpu_vm_lock_pd(vm, &exec, 0);
-		drm_exec_retry_on_contention(&exec);
-		if (unlikely(r))
-			goto out_unlock;
-	}
-
-	amdgpu_vm_bo_del(adev, bo_va);
-	if (!amdgpu_vm_ready(vm))
-		goto out_unlock;
-
-	r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
-				   &fence);
-	if (r)
-		goto out_unlock;
-	if (fence) {
-		amdgpu_bo_fence(bo, fence, true);
-		fence = NULL;
-	}
-
-	r = amdgpu_vm_clear_freed(adev, vm, &fence);
-	if (r || !fence)
-		goto out_unlock;
-
-	dma_fence_wait(fence, false);
-	amdgpu_bo_fence(bo, fence, true);
-	dma_fence_put(fence);
-
-out_unlock:
-	if (unlikely(r < 0))
-		dev_err(adev->dev, "failed to clear page tables (%ld)\n", r);
-	drm_exec_fini(&exec);
-
-	return r;
-}
-
-static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev,
-					  int pasid, int *gang_id,
-					  int queue_type, int num_queue,
-					  struct amdgpu_ring **added_rings,
-					  struct amdgpu_mes_ctx_data *ctx_data)
-{
-	struct amdgpu_ring *ring;
-	struct amdgpu_mes_gang_properties gprops = {0};
-	int r, j;
-
-	/* create a gang for the process */
-	gprops.priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
-	gprops.gang_quantum = adev->mes.default_gang_quantum;
-	gprops.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
-	gprops.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
-	gprops.global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
-
-	r = amdgpu_mes_add_gang(adev, pasid, &gprops, gang_id);
-	if (r) {
-		DRM_ERROR("failed to add gang\n");
-		return r;
-	}
-
-	/* create queues for the gang */
-	for (j = 0; j < num_queue; j++) {
-		r = amdgpu_mes_add_ring(adev, *gang_id, queue_type, j,
-					ctx_data, &ring);
-		if (r) {
-			DRM_ERROR("failed to add ring\n");
-			break;
-		}
-
-		DRM_INFO("ring %s was added\n", ring->name);
-		added_rings[j] = ring;
-	}
-
-	return 0;
-}
-
-static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings)
-{
-	struct amdgpu_ring *ring;
-	int i, r;
-
-	for (i = 0; i < AMDGPU_MES_CTX_MAX_RINGS; i++) {
-		ring = added_rings[i];
-		if (!ring)
-			continue;
-
-		r = amdgpu_ring_test_helper(ring);
-		if (r)
-			return r;
-
-		r = amdgpu_ring_test_ib(ring, 1000 * 10);
-		if (r) {
-			DRM_DEV_ERROR(ring->adev->dev,
-				      "ring %s ib test failed (%d)\n",
-				      ring->name, r);
-			return r;
-		} else
-			DRM_INFO("ring %s ib test pass\n", ring->name);
-	}
-
-	return 0;
-}
-
-int amdgpu_mes_self_test(struct amdgpu_device *adev)
-{
-	struct amdgpu_vm *vm = NULL;
-	struct amdgpu_mes_ctx_data ctx_data = {0};
-	struct amdgpu_ring *added_rings[AMDGPU_MES_CTX_MAX_RINGS] = { NULL };
-	int gang_ids[3] = {0};
-	int queue_types[][2] = { { AMDGPU_RING_TYPE_GFX, 1 },
-				 { AMDGPU_RING_TYPE_COMPUTE, 1 },
-				 { AMDGPU_RING_TYPE_SDMA, 1} };
-	int i, r, pasid, k = 0;
-
-	pasid = amdgpu_pasid_alloc(16);
-	if (pasid < 0) {
-		dev_warn(adev->dev, "No more PASIDs available!");
-		pasid = 0;
-	}
-
-	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
-	if (!vm) {
-		r = -ENOMEM;
-		goto error_pasid;
-	}
-
-	r = amdgpu_vm_init(adev, vm, -1);
-	if (r) {
-		DRM_ERROR("failed to initialize vm\n");
-		goto error_pasid;
-	}
-
-	r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data);
-	if (r) {
-		DRM_ERROR("failed to alloc ctx meta data\n");
-		goto error_fini;
-	}
-
-	ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM;
-	r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data);
-	if (r) {
-		DRM_ERROR("failed to map ctx meta data\n");
-		goto error_vm;
-	}
-
-	r = amdgpu_mes_create_process(adev, pasid, vm);
-	if (r) {
-		DRM_ERROR("failed to create MES process\n");
-		goto error_vm;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
-		/* On GFX v10.3, fw hasn't supported to map sdma queue. */
-		if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
-			    IP_VERSION(10, 3, 0) &&
-		    amdgpu_ip_version(adev, GC_HWIP, 0) <
-			    IP_VERSION(11, 0, 0) &&
-		    queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
-			continue;
-
-		r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
-							   &gang_ids[i],
-							   queue_types[i][0],
-							   queue_types[i][1],
-							   &added_rings[k],
-							   &ctx_data);
-		if (r)
-			goto error_queues;
-
-		k += queue_types[i][1];
-	}
-
-	/* start ring test and ib test for MES queues */
-	amdgpu_mes_test_queues(added_rings);
-
-error_queues:
-	/* remove all queues */
-	for (i = 0; i < ARRAY_SIZE(added_rings); i++) {
-		if (!added_rings[i])
-			continue;
-		amdgpu_mes_remove_ring(adev, added_rings[i]);
-	}
-
-	for (i = 0; i < ARRAY_SIZE(gang_ids); i++) {
-		if (!gang_ids[i])
-			continue;
-		amdgpu_mes_remove_gang(adev, gang_ids[i]);
-	}
-
-	amdgpu_mes_destroy_process(adev, pasid);
-
-error_vm:
-	amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data);
-
-error_fini:
-	amdgpu_vm_fini(adev, vm);
-
-error_pasid:
-	if (pasid)
-		amdgpu_pasid_free(pasid);
-
-	amdgpu_mes_ctx_free_meta_data(&ctx_data);
-	kfree(vm);
-	return 0;
-}
-
 int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
 {
 	const struct mes_firmware_header_v1_0 *mes_hdr;
@@ -1705,7 +709,7 @@ int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev)
 	if (adev->enable_mes && adev->gfx.enable_cleaner_shader) {
 		mutex_lock(&adev->enforce_isolation_mutex);
 		for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
-			if (adev->enforce_isolation[i])
+			if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
 				r |= amdgpu_mes_set_enforce_isolation(adev, i, true);
 			else
 				r |= amdgpu_mes_set_enforce_isolation(adev, i, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index da2c9a8cb3e0..a41f65b4f733 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -111,8 +111,8 @@ struct amdgpu_mes {
 
 	uint32_t                        vmid_mask_gfxhub;
 	uint32_t                        vmid_mask_mmhub;
-	uint32_t                        compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
 	uint32_t                        gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
+	uint32_t                        compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
 	uint32_t                        sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
 	uint32_t                        aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
 	uint32_t                        sch_ctx_offs[AMDGPU_MAX_MES_PIPES];
@@ -149,19 +149,6 @@ struct amdgpu_mes {
 
 };
 
-struct amdgpu_mes_process {
-	int			pasid;
-	struct			amdgpu_vm *vm;
-	uint64_t		pd_gpu_addr;
-	struct amdgpu_bo 	*proc_ctx_bo;
-	uint64_t 		proc_ctx_gpu_addr;
-	void 			*proc_ctx_cpu_ptr;
-	uint64_t 		process_quantum;
-	struct 			list_head gang_list;
-	uint32_t 		doorbell_index;
-	struct mutex		doorbell_lock;
-};
-
 struct amdgpu_mes_gang {
 	int 				gang_id;
 	int 				priority;
@@ -248,18 +235,6 @@ struct mes_remove_queue_input {
 	uint64_t	gang_context_addr;
 };
 
-struct mes_reset_queue_input {
-	uint32_t	doorbell_offset;
-	uint64_t	gang_context_addr;
-	bool		use_mmio;
-	uint32_t	queue_type;
-	uint32_t	me_id;
-	uint32_t	pipe_id;
-	uint32_t	queue_id;
-	uint32_t	xcc_id;
-	uint32_t	vmid;
-};
-
 struct mes_map_legacy_queue_input {
 	uint32_t                           queue_type;
 	uint32_t                           doorbell_offset;
@@ -291,7 +266,7 @@ struct mes_resume_gang_input {
 	uint64_t	gang_context_addr;
 };
 
-struct mes_reset_legacy_queue_input {
+struct mes_reset_queue_input {
 	uint32_t                           queue_type;
 	uint32_t                           doorbell_offset;
 	bool                               use_mmio;
@@ -301,6 +276,8 @@ struct mes_reset_legacy_queue_input {
 	uint64_t                           mqd_addr;
 	uint64_t                           wptr_addr;
 	uint32_t                           vmid;
+	bool                               legacy_gfx;
+	bool                               is_kq;
 };
 
 enum mes_misc_opcode {
@@ -388,9 +365,6 @@ struct amdgpu_mes_funcs {
 	int (*misc_op)(struct amdgpu_mes *mes,
 		       struct mes_misc_op_input *input);
 
-	int (*reset_legacy_queue)(struct amdgpu_mes *mes,
-				  struct mes_reset_legacy_queue_input *input);
-
 	int (*reset_hw_queue)(struct amdgpu_mes *mes,
 			      struct mes_reset_queue_input *input);
 };
@@ -404,26 +378,9 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
 int amdgpu_mes_init(struct amdgpu_device *adev);
 void amdgpu_mes_fini(struct amdgpu_device *adev);
 
-int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
-			      struct amdgpu_vm *vm);
-void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid);
-
-int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
-			struct amdgpu_mes_gang_properties *gprops,
-			int *gang_id);
-int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id);
-
 int amdgpu_mes_suspend(struct amdgpu_device *adev);
 int amdgpu_mes_resume(struct amdgpu_device *adev);
 
-int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
-			    struct amdgpu_mes_queue_properties *qprops,
-			    int *queue_id);
-int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
-int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id);
-int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type,
-				   int me_id, int pipe_id, int queue_id, int vmid);
-
 int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
 				struct amdgpu_ring *ring);
 int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
@@ -451,27 +408,10 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
 				bool trap_en);
 int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
 				uint64_t process_context_addr);
-int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
-			int queue_type, int idx,
-			struct amdgpu_mes_ctx_data *ctx_data,
-			struct amdgpu_ring **out);
-void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
-			    struct amdgpu_ring *ring);
 
 uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
 						   enum amdgpu_mes_priority_level prio);
 
-int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
-				   struct amdgpu_mes_ctx_data *ctx_data);
-void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data);
-int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
-				 struct amdgpu_vm *vm,
-				 struct amdgpu_mes_ctx_data *ctx_data);
-int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
-				   struct amdgpu_mes_ctx_data *ctx_data);
-
-int amdgpu_mes_self_test(struct amdgpu_device *adev);
-
 int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 0b9987781f76..dbe57996a481 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1644,7 +1644,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
 	amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
 	amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID);
 	amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC);
-
+	/* Add the gem obj resv fence dump*/
+	if (dma_resv_trylock(bo->tbo.base.resv)) {
+		dma_resv_describe(bo->tbo.base.resv, m);
+		dma_resv_unlock(bo->tbo.base.resv);
+	}
 	seq_puts(m, "\n");
 
 	return size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index df5d5dbd7f0f..bb67d2bbd7f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2214,7 +2214,8 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
 
 	if (!psp->securedisplay_context.context.bin_desc.size_bytes ||
 	    !psp->securedisplay_context.context.bin_desc.start_addr) {
-		dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n");
+		dev_info(psp->adev->dev,
+			 "SECUREDISPLAY: optional securedisplay ta ucode is not available\n");
 		return 0;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 8d5acc415d38..dcf5e8e0b9e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -107,6 +107,7 @@ enum psp_reg_prog_id {
 	PSP_REG_IH_RB_CNTL        = 0,  /* register IH_RB_CNTL */
 	PSP_REG_IH_RB_CNTL_RING1  = 1,  /* register IH_RB_CNTL_RING1 */
 	PSP_REG_IH_RB_CNTL_RING2  = 2,  /* register IH_RB_CNTL_RING2 */
+	PSP_REG_MMHUB_L1_TLB_CNTL = 25,
 	PSP_REG_LAST
 };
 
@@ -142,6 +143,8 @@ struct psp_funcs {
 	bool (*get_ras_capability)(struct psp_context *psp);
 	bool (*is_aux_sos_load_required)(struct psp_context *psp);
 	bool (*is_reload_needed)(struct psp_context *psp);
+	int (*reg_program_no_ring)(struct psp_context *psp, uint32_t val,
+				   enum psp_reg_prog_id id);
 };
 
 struct ta_funcs {
@@ -475,6 +478,10 @@ struct amdgpu_psp_funcs {
 #define psp_is_aux_sos_load_required(psp) \
 	((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0)
 
+#define psp_reg_program_no_ring(psp, val, id) \
+	((psp)->funcs->reg_program_no_ring ? \
+	(psp)->funcs->reg_program_no_ring((psp), val, id) : -EINVAL)
+
 extern const struct amd_ip_funcs psp_ip_funcs;
 
 extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
@@ -569,5 +576,8 @@ bool amdgpu_psp_get_ras_capability(struct psp_context *psp);
 int psp_config_sq_perfmon(struct psp_context *psp, uint32_t xcp_id,
 	bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable);
 bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev);
+int amdgpu_psp_reg_program_no_ring(struct psp_context *psp, uint32_t val,
+				   enum psp_reg_prog_id id);
+
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 443409d4f4b0..f40b35f7f679 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1498,6 +1498,9 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
 	    !amdgpu_ras_get_aca_debug_mode(adev))
 		return -EOPNOTSUPP;
 
+	if (amdgpu_sriov_vf(adev))
+		return -EOPNOTSUPP;
+
 	/* skip ras error reset in gpu reset */
 	if ((amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) &&
 	    ((smu_funcs && smu_funcs->set_debug_mode) ||
@@ -2161,7 +2164,7 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
 	/* Fatal error events are handled on host side */
 	if (amdgpu_sriov_vf(adev))
 		return;
-	/**
+	/*
 	 * If the current interrupt is caused by a non-fatal RAS error, skip
 	 * check for fatal error. For fatal errors, FED status of all devices
 	 * in XGMI hive gets set when the first device gets fatal error
@@ -3793,10 +3796,12 @@ init_ras_enabled_flag:
 		adev->ras_hw_enabled & amdgpu_ras_mask;
 
 	/* aca is disabled by default except for psp v13_0_6/v13_0_12/v13_0_14 */
-	adev->aca.is_enabled =
-		(amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
-		 amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
-		 amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14));
+	if (!amdgpu_sriov_vf(adev)) {
+		adev->aca.is_enabled =
+			(amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+			amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+			amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14));
+	}
 
 	/* bad page feature is not applicable to specific app platform */
 	if (adev->gmc.is_app_apu &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 0ea7cfaf3587..2c58e09e56f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -418,6 +418,7 @@ static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control
 		hdr->version = RAS_TABLE_VER_V2_1;
 		return;
 	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 5, 0):
 		hdr->version = RAS_TABLE_VER_V3;
 		return;
 	default:
@@ -1392,17 +1393,39 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
 
 	__decode_table_header_from_buf(hdr, buf);
 
-	if (hdr->version >= RAS_TABLE_VER_V2_1) {
+	if (hdr->header != RAS_TABLE_HDR_VAL &&
+	    hdr->header != RAS_TABLE_HDR_BAD) {
+		dev_info(adev->dev, "Creating a new EEPROM table");
+		return amdgpu_ras_eeprom_reset_table(control);
+	}
+
+	switch (hdr->version) {
+	case RAS_TABLE_VER_V2_1:
+	case RAS_TABLE_VER_V3:
 		control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr);
 		control->ras_record_offset = RAS_RECORD_START_V2_1;
 		control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
-	} else {
+		break;
+	case RAS_TABLE_VER_V1:
 		control->ras_num_recs = RAS_NUM_RECS(hdr);
 		control->ras_record_offset = RAS_RECORD_START;
 		control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+		break;
+	default:
+		dev_err(adev->dev,
+			"RAS header invalid, unsupported version: %u",
+			hdr->version);
+		return -EINVAL;
 	}
-	control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
 
+	if (control->ras_num_recs > control->ras_max_record_count) {
+		dev_err(adev->dev,
+			"RAS header invalid, records in header: %u max allowed :%u",
+			control->ras_num_recs, control->ras_max_record_count);
+		return -EINVAL;
+	}
+
+	control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
 	control->ras_num_mca_recs = 0;
 	control->ras_num_pa_recs = 0;
 	return 0;
@@ -1413,7 +1436,7 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
 	struct amdgpu_device *adev = to_amdgpu_device(control);
 	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-	int res;
+	int res = 0;
 
 	if (!__is_ras_eeprom_supported(adev))
 		return 0;
@@ -1494,10 +1517,6 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
 					 "User defined threshold is set, runtime service will be halt when threshold is reached\n");
 			}
 		}
-	} else {
-		DRM_INFO("Creating a new EEPROM table");
-
-		res = amdgpu_ras_eeprom_reset_table(control);
 	}
 
 	return res < 0 ? res : 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 59acdbfe28d8..426834806fbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -187,14 +187,10 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
 }
 
 #define amdgpu_ring_get_gpu_addr(ring, offset)				\
-	(ring->is_mes_queue ?						\
-	 (ring->mes_ctx->meta_data_gpu_addr + offset) :			\
-	 (ring->adev->wb.gpu_addr + offset * 4))
+	 (ring->adev->wb.gpu_addr + offset * 4)
 
 #define amdgpu_ring_get_cpu_addr(ring, offset)				\
-	(ring->is_mes_queue ?						\
-	 (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
-	 (&ring->adev->wb.wb[offset]))
+	 (&ring->adev->wb.wb[offset])
 
 /**
  * amdgpu_ring_init - init driver ring struct.
@@ -243,57 +239,42 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		ring->sched_score = sched_score;
 		ring->vmid_wait = dma_fence_get_stub();
 
-		if (!ring->is_mes_queue) {
-			ring->idx = adev->num_rings++;
-			adev->rings[ring->idx] = ring;
-		}
+		ring->idx = adev->num_rings++;
+		adev->rings[ring->idx] = ring;
 
 		r = amdgpu_fence_driver_init_ring(ring);
 		if (r)
 			return r;
 	}
 
-	if (ring->is_mes_queue) {
-		ring->rptr_offs = amdgpu_mes_ctx_get_offs(ring,
-				AMDGPU_MES_CTX_RPTR_OFFS);
-		ring->wptr_offs = amdgpu_mes_ctx_get_offs(ring,
-				AMDGPU_MES_CTX_WPTR_OFFS);
-		ring->fence_offs = amdgpu_mes_ctx_get_offs(ring,
-				AMDGPU_MES_CTX_FENCE_OFFS);
-		ring->trail_fence_offs = amdgpu_mes_ctx_get_offs(ring,
-				AMDGPU_MES_CTX_TRAIL_FENCE_OFFS);
-		ring->cond_exe_offs = amdgpu_mes_ctx_get_offs(ring,
-				AMDGPU_MES_CTX_COND_EXE_OFFS);
-	} else {
-		r = amdgpu_device_wb_get(adev, &ring->rptr_offs);
-		if (r) {
-			dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
-			return r;
-		}
+	r = amdgpu_device_wb_get(adev, &ring->rptr_offs);
+	if (r) {
+		dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
+		return r;
+	}
 
-		r = amdgpu_device_wb_get(adev, &ring->wptr_offs);
-		if (r) {
-			dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
-			return r;
-		}
+	r = amdgpu_device_wb_get(adev, &ring->wptr_offs);
+	if (r) {
+		dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
+		return r;
+	}
 
-		r = amdgpu_device_wb_get(adev, &ring->fence_offs);
-		if (r) {
-			dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
-			return r;
-		}
+	r = amdgpu_device_wb_get(adev, &ring->fence_offs);
+	if (r) {
+		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
+		return r;
+	}
 
-		r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
-		if (r) {
-			dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
-			return r;
-		}
+	r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
+	if (r) {
+		dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
+		return r;
+	}
 
-		r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
-		if (r) {
-			dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
-			return r;
-		}
+	r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
+	if (r) {
+		dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
+		return r;
 	}
 
 	ring->fence_gpu_addr =
@@ -353,18 +334,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 	ring->cached_rptr = 0;
 
 	/* Allocate ring buffer */
-	if (ring->is_mes_queue) {
-		int offset = 0;
-
-		BUG_ON(ring->ring_size > PAGE_SIZE*4);
-
-		offset = amdgpu_mes_ctx_get_offs(ring,
-					 AMDGPU_MES_CTX_RING_OFFS);
-		ring->gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		ring->ring = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-		amdgpu_ring_clear_ring(ring);
-
-	} else if (ring->ring_obj == NULL) {
+	if (ring->ring_obj == NULL) {
 		r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,
 					    AMDGPU_GEM_DOMAIN_GTT,
 					    &ring->ring_obj,
@@ -401,32 +371,26 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 {
 
 	/* Not to finish a ring which is not initialized */
-	if (!(ring->adev) ||
-	    (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
+	if (!(ring->adev) || !(ring->adev->rings[ring->idx]))
 		return;
 
 	ring->sched.ready = false;
 
-	if (!ring->is_mes_queue) {
-		amdgpu_device_wb_free(ring->adev, ring->rptr_offs);
-		amdgpu_device_wb_free(ring->adev, ring->wptr_offs);
+	amdgpu_device_wb_free(ring->adev, ring->rptr_offs);
+	amdgpu_device_wb_free(ring->adev, ring->wptr_offs);
 
-		amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
-		amdgpu_device_wb_free(ring->adev, ring->fence_offs);
+	amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
+	amdgpu_device_wb_free(ring->adev, ring->fence_offs);
 
-		amdgpu_bo_free_kernel(&ring->ring_obj,
-				      &ring->gpu_addr,
-				      (void **)&ring->ring);
-	} else {
-		kfree(ring->fence_drv.fences);
-	}
+	amdgpu_bo_free_kernel(&ring->ring_obj,
+			      &ring->gpu_addr,
+			      (void **)&ring->ring);
 
 	dma_fence_put(ring->vmid_wait);
 	ring->vmid_wait = NULL;
 	ring->me = 0;
 
-	if (!ring->is_mes_queue)
-		ring->adev->rings[ring->idx] = NULL;
+	ring->adev->rings[ring->idx] = NULL;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index bb2b66385223..b95b47110769 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -164,8 +164,24 @@ void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
 
 /* provided by hw blocks that expose a ring buffer for commands */
 struct amdgpu_ring_funcs {
+	/**
+	 * @type:
+	 *
+	 * GFX, Compute, SDMA, UVD, VCE, VCN, VPE, KIQ, MES, UMSCH, and CPER
+	 * use ring buffers. The type field just identifies which component the
+	 * ring buffer is associated with.
+	 */
 	enum amdgpu_ring_type	type;
 	uint32_t		align_mask;
+
+	/**
+	 * @nop:
+	 *
+	 * Every block in the amdgpu has no-op instructions (e.g., GFX 10
+	 * uses PACKET3(PACKET3_NOP, 0x3FFF), VCN 5 uses VCN_ENC_CMD_NO_OP,
+	 * etc). This field receives the specific no-op for the component
+	 * that initializes the ring.
+	 */
 	u32			nop;
 	bool			support_64bit_ptrs;
 	bool			no_user_fence;
@@ -241,6 +257,9 @@ struct amdgpu_ring_funcs {
 	bool (*is_guilty)(struct amdgpu_ring *ring);
 };
 
+/**
+ * amdgpu_ring - Holds ring information
+ */
 struct amdgpu_ring {
 	struct amdgpu_device		*adev;
 	const struct amdgpu_ring_funcs	*funcs;
@@ -252,13 +271,61 @@ struct amdgpu_ring {
 	unsigned		rptr_offs;
 	u64			rptr_gpu_addr;
 	volatile u32		*rptr_cpu_addr;
+
+	/**
+	 * @wptr:
+	 *
+	 * This is part of the Ring buffer implementation and represents the
+	 * write pointer. The wptr determines where the host has written.
+	 */
 	u64			wptr;
+
+	/**
+	 * @wptr_old:
+	 *
+	 * Before update wptr with the new value, usually the old value is
+	 * stored in the wptr_old.
+	 */
 	u64			wptr_old;
 	unsigned		ring_size;
+
+	/**
+	 * @max_dw:
+	 *
+	 * Maximum number of DWords for ring allocation. This information is
+	 * provided at the ring initialization time, and each IP block can
+	 * specify a specific value. Check places that invoke
+	 * amdgpu_ring_init() to see the maximum size per block.
+	 */
 	unsigned		max_dw;
+
+	/**
+	 * @count_dw:
+	 *
+	 * This value starts with the maximum amount of DWords supported by the
+	 * ring. This value is updated based on the ring manipulation.
+	 */
 	int			count_dw;
 	uint64_t		gpu_addr;
+
+	/**
+	 * @ptr_mask:
+	 *
+	 * Some IPs provide support for 64-bit pointers and others for 32-bit
+	 * only; this behavior is component-specific and defined by the field
+	 * support_64bit_ptr. If the IP block supports 64-bits, the mask
+	 * 0xffffffffffffffff is set; otherwise, this value assumes buf_mask.
+	 * Notice that this field is used to keep wptr under a valid range.
+	 */
 	uint64_t		ptr_mask;
+
+	/**
+	 * @buf_mask:
+	 *
+	 * Buffer mask is a value used to keep wptr count under its
+	 * thresholding. Buffer mask initialized during the ring buffer
+	 * initialization time, and it is defined as (ring_size / 4) -1.
+	 */
 	uint32_t		buf_mask;
 	u32			idx;
 	u32			xcc_id;
@@ -276,6 +343,13 @@ struct amdgpu_ring {
 	bool			use_pollmem;
 	unsigned		wptr_offs;
 	u64			wptr_gpu_addr;
+
+	/**
+	 * @wptr_cpu_addr:
+	 *
+	 * This is the CPU address pointer in the writeback slot. This is used
+	 * to commit changes to the GPU.
+	 */
 	volatile u32		*wptr_cpu_addr;
 	unsigned		fence_offs;
 	u64			fence_gpu_addr;
@@ -297,20 +371,15 @@ struct amdgpu_ring {
 	struct dma_fence	*vmid_wait;
 	bool			has_compute_vm_bug;
 	bool			no_scheduler;
+	bool			no_user_submission;
 	int			hw_prio;
 	unsigned 		num_hw_submission;
 	atomic_t		*sched_score;
 
-	/* used for mes */
-	bool			is_mes_queue;
-	uint32_t		hw_queue_id;
-	struct amdgpu_mes_ctx_data *mes_ctx;
-
 	bool            is_sw_ring;
 	unsigned int    entry_index;
 	/* store the cached rptr to restore after reset */
 	uint64_t cached_rptr;
-
 };
 
 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
@@ -435,15 +504,6 @@ static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring,
 	ring->ring[offset] = cur - offset;
 }
 
-#define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset)			\
-	(ring->is_mes_queue && ring->mes_ctx ?				\
-	 (ring->mes_ctx->meta_data_gpu_addr + offset) : 0)
-
-#define amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset)			\
-	(ring->is_mes_queue && ring->mes_ctx ?				\
-	 (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
-	 NULL)
-
 int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
 
 void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index fce22d3f816b..c210625be220 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -237,6 +237,20 @@ struct amdgpu_rlc_funcs {
 	void (*unset_safe_mode)(struct amdgpu_device *adev, int xcc_id);
 	int  (*init)(struct amdgpu_device *adev);
 	u32  (*get_csb_size)(struct amdgpu_device *adev);
+
+	/**
+	 * @get_csb_buffer: Get the clear state to be put into the hardware.
+	 *
+	 * The parameter adev is used to get the CS data and other gfx info,
+	 * and buffer is the RLC CS pointer
+	 *
+	 * Sometimes, the user space puts a request to clear the state in the
+	 * command buffer; this function provides the clear state that gets put
+	 * into the hardware. Note that the driver programs Clear State
+	 * Indirect Buffer (CSB) explicitly when it sets up the kernel rings,
+	 * and it also provides a pointer to it which is used by the firmware
+	 * to load the clear state in some cases.
+	 */
 	void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
 	int  (*get_cp_table_num)(struct amdgpu_device *adev);
 	int  (*resume)(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 529c9696c2f3..6716ac281c49 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -26,6 +26,8 @@
 #include "amdgpu_sdma.h"
 #include "amdgpu_ras.h"
 #include "amdgpu_reset.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_3_0_sh_mask.h"
 
 #define AMDGPU_CSA_SDMA_SIZE 64
 /* SDMA CSA reside in the 3rd page of CSA */
@@ -76,22 +78,14 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
 	if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp)
 		return 0;
 
-	if (ring->is_mes_queue) {
-		uint32_t offset = 0;
+	r = amdgpu_sdma_get_index_from_ring(ring, &index);
 
-		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
-				  sdma[ring->idx].sdma_meta_data);
-		csa_mc_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-	} else {
-		r = amdgpu_sdma_get_index_from_ring(ring, &index);
-
-		if (r || index > 31)
-			csa_mc_addr = 0;
-		else
-			csa_mc_addr = amdgpu_csa_vaddr(adev) +
-				AMDGPU_CSA_SDMA_OFFSET +
-				index * AMDGPU_CSA_SDMA_SIZE;
-	}
+	if (r || index > 31)
+		csa_mc_addr = 0;
+	else
+		csa_mc_addr = amdgpu_csa_vaddr(adev) +
+			AMDGPU_CSA_SDMA_OFFSET +
+			index * AMDGPU_CSA_SDMA_SIZE;
 
 	return csa_mc_addr;
 }
@@ -537,28 +531,38 @@ bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_rin
 		return false;
 }
 
-/**
- * amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks
- * @funcs: Pointer to the callback structure containing pre_reset and post_reset functions
- *
- * This function allows KFD and AMDGPU to register their own callbacks for handling
- * pre-reset and post-reset operations for engine reset. These are needed because engine
- * reset will stop all queues on that engine.
- */
-void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs)
+static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id)
 {
-	if (!funcs)
-		return;
-
-	/* Ensure the reset_callback_list is initialized */
-	if (!adev->sdma.reset_callback_list.next) {
-		INIT_LIST_HEAD(&adev->sdma.reset_callback_list);
+	struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
+	int r = -EOPNOTSUPP;
+
+	switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+	case IP_VERSION(4, 4, 2):
+	case IP_VERSION(4, 4, 4):
+	case IP_VERSION(4, 4, 5):
+		/* For SDMA 4.x, use the existing DPM interface for backward compatibility */
+		r = amdgpu_dpm_reset_sdma(adev, 1 << instance_id);
+		break;
+	case IP_VERSION(5, 0, 0):
+	case IP_VERSION(5, 0, 1):
+	case IP_VERSION(5, 0, 2):
+	case IP_VERSION(5, 0, 5):
+	case IP_VERSION(5, 2, 0):
+	case IP_VERSION(5, 2, 2):
+	case IP_VERSION(5, 2, 4):
+	case IP_VERSION(5, 2, 5):
+	case IP_VERSION(5, 2, 6):
+	case IP_VERSION(5, 2, 3):
+	case IP_VERSION(5, 2, 1):
+	case IP_VERSION(5, 2, 7):
+		if (sdma_instance->funcs->soft_reset_kernel_queue)
+			r = sdma_instance->funcs->soft_reset_kernel_queue(adev, instance_id);
+		break;
+	default:
+		break;
 	}
-	/* Initialize the list node in the callback structure */
-	INIT_LIST_HEAD(&funcs->list);
 
-	/* Add the callback structure to the global list */
-	list_add_tail(&funcs->list, &adev->sdma.reset_callback_list);
+	return r;
 }
 
 /**
@@ -566,16 +570,10 @@ void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct
  * @adev: Pointer to the AMDGPU device
  * @instance_id: ID of the SDMA engine instance to reset
  *
- * This function performs the following steps:
- * 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to save their state.
- * 2. Resets the specified SDMA engine instance.
- * 3. Calls all registered post_reset callbacks to allow KFD and AMDGPU to restore their state.
- *
  * Returns: 0 on success, or a negative error code on failure.
  */
 int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
 {
-	struct sdma_on_reset_funcs *funcs;
 	int ret = 0;
 	struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
 	struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
@@ -597,38 +595,18 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
 		page_sched_stopped = true;
 	}
 
-	/* Invoke all registered pre_reset callbacks */
-	list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) {
-		if (funcs->pre_reset) {
-			ret = funcs->pre_reset(adev, instance_id);
-			if (ret) {
-				dev_err(adev->dev,
-				"beforeReset callback failed for instance %u: %d\n",
-					instance_id, ret);
-				goto exit;
-			}
-		}
-	}
+	if (sdma_instance->funcs->stop_kernel_queue)
+		sdma_instance->funcs->stop_kernel_queue(gfx_ring);
 
 	/* Perform the SDMA reset for the specified instance */
-	ret = amdgpu_dpm_reset_sdma(adev, 1 << instance_id);
+	ret = amdgpu_sdma_soft_reset(adev, instance_id);
 	if (ret) {
 		dev_err(adev->dev, "Failed to reset SDMA instance %u\n", instance_id);
 		goto exit;
 	}
 
-	/* Invoke all registered post_reset callbacks */
-	list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) {
-		if (funcs->post_reset) {
-			ret = funcs->post_reset(adev, instance_id);
-			if (ret) {
-				dev_err(adev->dev,
-				"afterReset callback failed for instance %u: %d\n",
-					instance_id, ret);
-				goto exit;
-			}
-		}
-	}
+	if (sdma_instance->funcs->start_kernel_queue)
+		sdma_instance->funcs->start_kernel_queue(gfx_ring);
 
 exit:
 	/* Restart the scheduler's work queue for the GFX and page rings
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 47d56fd0589f..5605921212f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -50,6 +50,12 @@ enum amdgpu_sdma_irq {
 
 #define NUM_SDMA(x) hweight32(x)
 
+struct amdgpu_sdma_funcs {
+	int (*stop_kernel_queue)(struct amdgpu_ring *ring);
+	int (*start_kernel_queue)(struct amdgpu_ring *ring);
+	int (*soft_reset_kernel_queue)(struct amdgpu_device *adev, u32 instance_id);
+};
+
 struct amdgpu_sdma_instance {
 	/* SDMA firmware */
 	const struct firmware	*fw;
@@ -68,7 +74,7 @@ struct amdgpu_sdma_instance {
 	/* track guilty state of GFX and PAGE queues */
 	bool			gfx_guilty;
 	bool			page_guilty;
-
+	const struct amdgpu_sdma_funcs   *funcs;
 };
 
 enum amdgpu_sdma_ras_memory_id {
@@ -103,13 +109,6 @@ struct amdgpu_sdma_ras {
 	struct amdgpu_ras_block_object ras_block;
 };
 
-struct sdma_on_reset_funcs {
-	int (*pre_reset)(struct amdgpu_device *adev, uint32_t instance_id);
-	int (*post_reset)(struct amdgpu_device *adev, uint32_t instance_id);
-	/* Linked list node to store this structure in a list; */
-	struct list_head list;
-};
-
 struct amdgpu_sdma {
 	struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
 	struct amdgpu_irq_src	trap_irq;
@@ -131,6 +130,8 @@ struct amdgpu_sdma {
 	uint32_t		*ip_dump;
 	uint32_t 		supported_reset;
 	struct list_head	reset_callback_list;
+	bool			no_user_submission;
+	bool			disable_uq;
 };
 
 /*
@@ -170,7 +171,6 @@ struct amdgpu_buffer_funcs {
 				 uint32_t byte_count);
 };
 
-void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs);
 int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id);
 
 #define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b), (t))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
index e22cb2b5cd92..3939761be31c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -45,7 +45,11 @@
  */
 static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev)
 {
-	return AMDGPU_VA_RESERVED_SEQ64_START(adev);
+	u64 addr = AMDGPU_VA_RESERVED_SEQ64_START(adev);
+
+	addr = amdgpu_gmc_sign_extend(addr);
+
+	return addr;
 }
 
 /**
@@ -63,9 +67,9 @@ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev)
 int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		     struct amdgpu_bo_va **bo_va)
 {
+	u64 seq64_addr, va_flags;
 	struct amdgpu_bo *bo;
 	struct drm_exec exec;
-	u64 seq64_addr;
 	int r;
 
 	bo = adev->seq64.sbo;
@@ -88,9 +92,11 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		goto error;
 	}
 
-	seq64_addr = amdgpu_seq64_get_va_base(adev);
+	seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK;
+
+	va_flags = amdgpu_gem_va_map_flags(adev, AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC);
 	r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE,
-			     AMDGPU_PTE_READABLE);
+			     va_flags);
 	if (r) {
 		DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r);
 		amdgpu_vm_bo_del(adev, *bo_va);
@@ -156,6 +162,7 @@ error:
  *
  * @adev: amdgpu_device pointer
  * @va: VA to access the seq in process address space
+ * @gpu_addr: GPU address to access the seq
  * @cpu_addr: CPU address to access the seq
  *
  * Alloc a 64 bit memory from seq64 pool.
@@ -163,7 +170,8 @@ error:
  * Returns:
  * 0 on success or a negative error code on failure
  */
-int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr)
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va,
+		       u64 *gpu_addr, u64 **cpu_addr)
 {
 	unsigned long bit_pos;
 
@@ -172,7 +180,12 @@ int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr)
 		return -ENOSPC;
 
 	__set_bit(bit_pos, adev->seq64.used);
+
 	*va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev);
+
+	if (gpu_addr)
+		*gpu_addr = bit_pos * sizeof(u64) + adev->seq64.gpu_addr;
+
 	*cpu_addr = bit_pos + adev->seq64.cpu_base_addr;
 
 	return 0;
@@ -233,7 +246,7 @@ int amdgpu_seq64_init(struct amdgpu_device *adev)
 	 */
 	r = amdgpu_bo_create_kernel(adev, AMDGPU_VA_RESERVED_SEQ64_SIZE,
 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
-				    &adev->seq64.sbo, NULL,
+				    &adev->seq64.sbo, &adev->seq64.gpu_addr,
 				    (void **)&adev->seq64.cpu_base_addr);
 	if (r) {
 		dev_warn(adev->dev, "(%d) create seq64 failed\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
index 4203b2ab318d..26a249aaaee1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
@@ -32,13 +32,14 @@
 struct amdgpu_seq64 {
 	struct amdgpu_bo *sbo;
 	u32 num_sem;
+	u64 gpu_addr;
 	u64 *cpu_base_addr;
 	DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS);
 };
 
 void amdgpu_seq64_fini(struct amdgpu_device *adev);
 int amdgpu_seq64_init(struct amdgpu_device *adev);
-int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, u64 **cpu_addr);
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 *gpu_addr, u64 **cpu_addr);
 void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr);
 int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		     struct amdgpu_bo_va **bo_va);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 5576ed0b508f..d6ae9974c952 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -249,9 +249,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 
 	if (resv == NULL)
 		return -EINVAL;
-
-	/* TODO: Use DMA_RESV_USAGE_READ here */
-	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) {
+	/* Implicitly sync only to KERNEL, WRITE and READ */
+	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) {
 		dma_fence_chain_for_each(f, f) {
 			struct dma_fence *tmp = dma_fence_chain_contained(f);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 4eedd92f000b..06fe21e15ed6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -25,6 +25,8 @@
 
 #include "amdgpu_socbb.h"
 
+#define RS64_FW_UC_START_ADDR_LO 0x3000
+
 struct common_firmware_header {
 	uint32_t size_bytes; /* size of the entire header+image(s) in bytes */
 	uint32_t header_size_bytes; /* size of just the header in bytes */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
new file mode 100644
index 000000000000..394b036be1d9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -0,0 +1,919 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drm_auth.h>
+#include <drm/drm_exec.h>
+#include <linux/pm_runtime.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_userq_fence.h"
+
+u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
+{
+	int i;
+	u32 userq_ip_mask = 0;
+
+	for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
+		if (adev->userq_funcs[i])
+			userq_ip_mask |= (1 << i);
+	}
+
+	return userq_ip_mask;
+}
+
+static int
+amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
+			  struct amdgpu_usermode_queue *queue)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	const struct amdgpu_userq_funcs *userq_funcs =
+		adev->userq_funcs[queue->queue_type];
+	int r = 0;
+
+	if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
+		r = userq_funcs->unmap(uq_mgr, queue);
+		if (r)
+			queue->state = AMDGPU_USERQ_STATE_HUNG;
+		else
+			queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
+	}
+	return r;
+}
+
+static int
+amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr,
+			struct amdgpu_usermode_queue *queue)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	const struct amdgpu_userq_funcs *userq_funcs =
+		adev->userq_funcs[queue->queue_type];
+	int r = 0;
+
+	if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
+		r = userq_funcs->map(uq_mgr, queue);
+		if (r) {
+			queue->state = AMDGPU_USERQ_STATE_HUNG;
+		} else {
+			queue->state = AMDGPU_USERQ_STATE_MAPPED;
+		}
+	}
+	return r;
+}
+
+static void
+amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
+				 struct amdgpu_usermode_queue *queue)
+{
+	struct dma_fence *f = queue->last_fence;
+	int ret;
+
+	if (f && !dma_fence_is_signaled(f)) {
+		ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
+		if (ret <= 0)
+			drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
+				     f->context, f->seqno);
+	}
+}
+
+static void
+amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
+		     struct amdgpu_usermode_queue *queue,
+		     int queue_id)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
+
+	uq_funcs->mqd_destroy(uq_mgr, queue);
+	amdgpu_userq_fence_driver_free(queue);
+	idr_remove(&uq_mgr->userq_idr, queue_id);
+	kfree(queue);
+}
+
+int
+amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_usermode_queue *queue;
+	int queue_id;
+	int ret = 0;
+
+	mutex_lock(&uq_mgr->userq_mutex);
+	/* Resume all the queues for this process */
+	idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id)
+		ret += queue->state == AMDGPU_USERQ_STATE_MAPPED;
+
+	mutex_unlock(&uq_mgr->userq_mutex);
+	return ret;
+}
+
+static struct amdgpu_usermode_queue *
+amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
+{
+	return idr_find(&uq_mgr->userq_idr, qid);
+}
+
+void
+amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
+			     struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+	struct amdgpu_eviction_fence *ev_fence;
+
+retry:
+	/* Flush any pending resume work to create ev_fence */
+	flush_delayed_work(&uq_mgr->resume_work);
+
+	mutex_lock(&uq_mgr->userq_mutex);
+	spin_lock(&evf_mgr->ev_fence_lock);
+	ev_fence = evf_mgr->ev_fence;
+	spin_unlock(&evf_mgr->ev_fence_lock);
+	if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) {
+		mutex_unlock(&uq_mgr->userq_mutex);
+		/*
+		 * Looks like there was no pending resume work,
+		 * add one now to create a valid eviction fence
+		 */
+		schedule_delayed_work(&uq_mgr->resume_work, 0);
+		goto retry;
+	}
+}
+
+int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
+			       struct amdgpu_userq_obj *userq_obj,
+			       int size)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	struct amdgpu_bo_param bp;
+	int r;
+
+	memset(&bp, 0, sizeof(bp));
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+	bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+		   AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+	bp.type = ttm_bo_type_kernel;
+	bp.size = size;
+	bp.resv = NULL;
+	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+	r = amdgpu_bo_create(adev, &bp, &userq_obj->obj);
+	if (r) {
+		drm_file_err(uq_mgr->file, "Failed to allocate BO for userqueue (%d)", r);
+		return r;
+	}
+
+	r = amdgpu_bo_reserve(userq_obj->obj, true);
+	if (r) {
+		drm_file_err(uq_mgr->file, "Failed to reserve BO to map (%d)", r);
+		goto free_obj;
+	}
+
+	r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo);
+	if (r) {
+		drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r);
+		goto unresv;
+	}
+
+	r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr);
+	if (r) {
+		drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r);
+		goto unresv;
+	}
+
+	userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj);
+	amdgpu_bo_unreserve(userq_obj->obj);
+	memset(userq_obj->cpu_ptr, 0, size);
+	return 0;
+
+unresv:
+	amdgpu_bo_unreserve(userq_obj->obj);
+
+free_obj:
+	amdgpu_bo_unref(&userq_obj->obj);
+	return r;
+}
+
+void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
+				 struct amdgpu_userq_obj *userq_obj)
+{
+	amdgpu_bo_kunmap(userq_obj->obj);
+	amdgpu_bo_unref(&userq_obj->obj);
+}
+
+uint64_t
+amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+				struct amdgpu_db_info *db_info,
+				struct drm_file *filp)
+{
+	uint64_t index;
+	struct drm_gem_object *gobj;
+	struct amdgpu_userq_obj *db_obj = db_info->db_obj;
+	int r, db_size;
+
+	gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle);
+	if (gobj == NULL) {
+		drm_file_err(uq_mgr->file, "Can't find GEM object for doorbell\n");
+		return -EINVAL;
+	}
+
+	db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+	drm_gem_object_put(gobj);
+
+	/* Pin the BO before generating the index, unpin in queue destroy */
+	r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL);
+	if (r) {
+		drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
+		goto unref_bo;
+	}
+
+	r = amdgpu_bo_reserve(db_obj->obj, true);
+	if (r) {
+		drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
+		goto unpin_bo;
+	}
+
+	switch (db_info->queue_type) {
+	case AMDGPU_HW_IP_GFX:
+	case AMDGPU_HW_IP_COMPUTE:
+	case AMDGPU_HW_IP_DMA:
+		db_size = sizeof(u64);
+		break;
+
+	case AMDGPU_HW_IP_VCN_ENC:
+		db_size = sizeof(u32);
+		db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1;
+		break;
+
+	case AMDGPU_HW_IP_VPE:
+		db_size = sizeof(u32);
+		db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VPE << 1;
+		break;
+
+	default:
+		drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n",
+			     db_info->queue_type);
+		r = -EINVAL;
+		goto unpin_bo;
+	}
+
+	index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
+					     db_info->doorbell_offset, db_size);
+	drm_dbg_driver(adev_to_drm(uq_mgr->adev),
+		       "[Usermode queues] doorbell index=%lld\n", index);
+	amdgpu_bo_unreserve(db_obj->obj);
+	return index;
+
+unpin_bo:
+	amdgpu_bo_unpin(db_obj->obj);
+
+unref_bo:
+	amdgpu_bo_unref(&db_obj->obj);
+	return r;
+}
+
+static int
+amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
+{
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+	struct amdgpu_device *adev = uq_mgr->adev;
+	struct amdgpu_usermode_queue *queue;
+	int r = 0;
+
+	cancel_delayed_work(&uq_mgr->resume_work);
+	mutex_lock(&uq_mgr->userq_mutex);
+
+	queue = amdgpu_userq_find(uq_mgr, queue_id);
+	if (!queue) {
+		drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n");
+		mutex_unlock(&uq_mgr->userq_mutex);
+		return -EINVAL;
+	}
+	amdgpu_userq_wait_for_last_fence(uq_mgr, queue);
+	r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+	amdgpu_bo_unpin(queue->db_obj.obj);
+	amdgpu_bo_unref(&queue->db_obj.obj);
+	amdgpu_userq_cleanup(uq_mgr, queue, queue_id);
+	mutex_unlock(&uq_mgr->userq_mutex);
+
+	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return r;
+}
+
+static int amdgpu_userq_priority_permit(struct drm_file *filp,
+					int priority)
+{
+	if (priority < AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH)
+		return 0;
+
+	if (capable(CAP_SYS_NICE))
+		return 0;
+
+	if (drm_is_current_master(filp))
+		return 0;
+
+	return -EACCES;
+}
+
+static int
+amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
+{
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+	struct amdgpu_device *adev = uq_mgr->adev;
+	const struct amdgpu_userq_funcs *uq_funcs;
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_db_info db_info;
+	bool skip_map_queue;
+	uint64_t index;
+	int qid, r = 0;
+	int priority =
+		(args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
+		AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
+
+	/* Usermode queues are only supported for GFX IP as of now */
+	if (args->in.ip_type != AMDGPU_HW_IP_GFX &&
+	    args->in.ip_type != AMDGPU_HW_IP_DMA &&
+	    args->in.ip_type != AMDGPU_HW_IP_COMPUTE) {
+		drm_file_err(uq_mgr->file, "Usermode queue doesn't support IP type %u\n",
+			     args->in.ip_type);
+		return -EINVAL;
+	}
+
+	r = amdgpu_userq_priority_permit(filp, priority);
+	if (r)
+		return r;
+
+	if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) &&
+	    (args->in.ip_type != AMDGPU_HW_IP_GFX) &&
+	    (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) &&
+	    !amdgpu_is_tmz(adev)) {
+		drm_file_err(uq_mgr->file, "Secure only supported on GFX/Compute queues\n");
+		return -EINVAL;
+	}
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n");
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	/*
+	 * There could be a situation that we are creating a new queue while
+	 * the other queues under this UQ_mgr are suspended. So if there is any
+	 * resume work pending, wait for it to get done.
+	 *
+	 * This will also make sure we have a valid eviction fence ready to be used.
+	 */
+	amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
+
+	uq_funcs = adev->userq_funcs[args->in.ip_type];
+	if (!uq_funcs) {
+		drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n",
+			     args->in.ip_type);
+		r = -EINVAL;
+		goto unlock;
+	}
+
+	queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
+	if (!queue) {
+		drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n");
+		r = -ENOMEM;
+		goto unlock;
+	}
+	queue->doorbell_handle = args->in.doorbell_handle;
+	queue->queue_type = args->in.ip_type;
+	queue->vm = &fpriv->vm;
+	queue->priority = priority;
+
+	db_info.queue_type = queue->queue_type;
+	db_info.doorbell_handle = queue->doorbell_handle;
+	db_info.db_obj = &queue->db_obj;
+	db_info.doorbell_offset = args->in.doorbell_offset;
+
+	/* Convert relative doorbell offset into absolute doorbell index */
+	index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp);
+	if (index == (uint64_t)-EINVAL) {
+		drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n");
+		kfree(queue);
+		goto unlock;
+	}
+
+	queue->doorbell_index = index;
+	xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
+	r = amdgpu_userq_fence_driver_alloc(adev, queue);
+	if (r) {
+		drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n");
+		goto unlock;
+	}
+
+	r = uq_funcs->mqd_create(uq_mgr, &args->in, queue);
+	if (r) {
+		drm_file_err(uq_mgr->file, "Failed to create Queue\n");
+		amdgpu_userq_fence_driver_free(queue);
+		kfree(queue);
+		goto unlock;
+	}
+
+
+	qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL);
+	if (qid < 0) {
+		drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
+		amdgpu_userq_fence_driver_free(queue);
+		uq_funcs->mqd_destroy(uq_mgr, queue);
+		kfree(queue);
+		r = -ENOMEM;
+		goto unlock;
+	}
+
+	/* don't map the queue if scheduling is halted */
+	mutex_lock(&adev->userq_mutex);
+	if (adev->userq_halt_for_enforce_isolation &&
+	    ((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+	     (queue->queue_type == AMDGPU_HW_IP_COMPUTE)))
+		skip_map_queue = true;
+	else
+		skip_map_queue = false;
+	if (!skip_map_queue) {
+		r = amdgpu_userq_map_helper(uq_mgr, queue);
+		if (r) {
+			mutex_unlock(&adev->userq_mutex);
+			drm_file_err(uq_mgr->file, "Failed to map Queue\n");
+			idr_remove(&uq_mgr->userq_idr, qid);
+			amdgpu_userq_fence_driver_free(queue);
+			uq_funcs->mqd_destroy(uq_mgr, queue);
+			kfree(queue);
+			goto unlock;
+		}
+	}
+	mutex_unlock(&adev->userq_mutex);
+
+
+	args->out.queue_id = qid;
+
+unlock:
+	mutex_unlock(&uq_mgr->userq_mutex);
+
+	return r;
+}
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *filp)
+{
+	union drm_amdgpu_userq *args = data;
+	int r;
+
+	switch (args->in.op) {
+	case AMDGPU_USERQ_OP_CREATE:
+		if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK |
+				       AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE))
+			return -EINVAL;
+		r = amdgpu_userq_create(filp, args);
+		if (r)
+			drm_file_err(filp, "Failed to create usermode queue\n");
+		break;
+
+	case AMDGPU_USERQ_OP_FREE:
+		if (args->in.ip_type ||
+		    args->in.doorbell_handle ||
+		    args->in.doorbell_offset ||
+		    args->in.flags ||
+		    args->in.queue_va ||
+		    args->in.queue_size ||
+		    args->in.rptr_va ||
+		    args->in.wptr_va ||
+		    args->in.wptr_va ||
+		    args->in.mqd ||
+		    args->in.mqd_size)
+			return -EINVAL;
+		r = amdgpu_userq_destroy(filp, args->in.queue_id);
+		if (r)
+			drm_file_err(filp, "Failed to destroy usermode queue\n");
+		break;
+
+	default:
+		drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op);
+		return -EINVAL;
+	}
+
+	return r;
+}
+
+static int
+amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_usermode_queue *queue;
+	int queue_id;
+	int ret = 0, r;
+
+	/* Resume all the queues for this process */
+	idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+		r = amdgpu_userq_map_helper(uq_mgr, queue);
+		if (r)
+			ret = r;
+	}
+
+	if (ret)
+		drm_file_err(uq_mgr->file, "Failed to map all the queues\n");
+	return ret;
+}
+
+static int
+amdgpu_userq_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+	int ret;
+
+	amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+
+	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (ret)
+		DRM_ERROR("Fail to validate\n");
+
+	return ret;
+}
+
+static int
+amdgpu_userq_validate_bos(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+	struct amdgpu_vm *vm = &fpriv->vm;
+	struct amdgpu_device *adev = uq_mgr->adev;
+	struct amdgpu_bo_va *bo_va;
+	struct ww_acquire_ctx *ticket;
+	struct drm_exec exec;
+	struct amdgpu_bo *bo;
+	struct dma_resv *resv;
+	bool clear, unlock;
+	int ret = 0;
+
+	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+	drm_exec_until_all_locked(&exec) {
+		ret = amdgpu_vm_lock_pd(vm, &exec, 2);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(ret)) {
+			drm_file_err(uq_mgr->file, "Failed to lock PD\n");
+			goto unlock_all;
+		}
+
+		/* Lock the done list */
+		list_for_each_entry(bo_va, &vm->done, base.vm_status) {
+			bo = bo_va->base.bo;
+			if (!bo)
+				continue;
+
+			ret = drm_exec_lock_obj(&exec, &bo->tbo.base);
+			drm_exec_retry_on_contention(&exec);
+			if (unlikely(ret))
+				goto unlock_all;
+		}
+	}
+
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->moved)) {
+		bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
+					 base.vm_status);
+		spin_unlock(&vm->status_lock);
+
+		/* Per VM BOs never need to bo cleared in the page tables */
+		ret = amdgpu_vm_bo_update(adev, bo_va, false);
+		if (ret)
+			goto unlock_all;
+		spin_lock(&vm->status_lock);
+	}
+
+	ticket = &exec.ticket;
+	while (!list_empty(&vm->invalidated)) {
+		bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
+					 base.vm_status);
+		resv = bo_va->base.bo->tbo.base.resv;
+		spin_unlock(&vm->status_lock);
+
+		bo = bo_va->base.bo;
+		ret = amdgpu_userq_validate_vm_bo(NULL, bo);
+		if (ret) {
+			drm_file_err(uq_mgr->file, "Failed to validate BO\n");
+			goto unlock_all;
+		}
+
+		/* Try to reserve the BO to avoid clearing its ptes */
+		if (!adev->debug_vm && dma_resv_trylock(resv)) {
+			clear = false;
+			unlock = true;
+		/* The caller is already holding the reservation lock */
+		} else if (dma_resv_locking_ctx(resv) == ticket) {
+			clear = false;
+			unlock = false;
+		/* Somebody else is using the BO right now */
+		} else {
+			clear = true;
+			unlock = false;
+		}
+
+		ret = amdgpu_vm_bo_update(adev, bo_va, clear);
+
+		if (unlock)
+			dma_resv_unlock(resv);
+		if (ret)
+			goto unlock_all;
+
+		spin_lock(&vm->status_lock);
+	}
+	spin_unlock(&vm->status_lock);
+
+	ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
+	if (ret)
+		drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n");
+
+unlock_all:
+	drm_exec_fini(&exec);
+	return ret;
+}
+
+static void amdgpu_userq_restore_worker(struct work_struct *work)
+{
+	struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work);
+	struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+	int ret;
+
+	flush_work(&fpriv->evf_mgr.suspend_work.work);
+
+	mutex_lock(&uq_mgr->userq_mutex);
+
+	ret = amdgpu_userq_validate_bos(uq_mgr);
+	if (ret) {
+		drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n");
+		goto unlock;
+	}
+
+	ret = amdgpu_userq_restore_all(uq_mgr);
+	if (ret) {
+		drm_file_err(uq_mgr->file, "Failed to restore all queues\n");
+		goto unlock;
+	}
+
+unlock:
+	mutex_unlock(&uq_mgr->userq_mutex);
+}
+
+static int
+amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_usermode_queue *queue;
+	int queue_id;
+	int ret = 0, r;
+
+	/* Try to unmap all the queues in this process ctx */
+	idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+		r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+		if (r)
+			ret = r;
+	}
+
+	if (ret)
+		drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n");
+	return ret;
+}
+
+static int
+amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_usermode_queue *queue;
+	int queue_id, ret;
+
+	idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+		struct dma_fence *f = queue->last_fence;
+
+		if (!f || dma_fence_is_signaled(f))
+			continue;
+		ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
+		if (ret <= 0) {
+			drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
+				     f->context, f->seqno);
+			return -ETIMEDOUT;
+		}
+	}
+
+	return 0;
+}
+
+void
+amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
+		   struct amdgpu_eviction_fence *ev_fence)
+{
+	int ret;
+	struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+	struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
+
+	/* Wait for any pending userqueue fence work to finish */
+	ret = amdgpu_userq_wait_for_signal(uq_mgr);
+	if (ret) {
+		drm_file_err(uq_mgr->file, "Not evicting userqueue, timeout waiting for work\n");
+		return;
+	}
+
+	ret = amdgpu_userq_evict_all(uq_mgr);
+	if (ret) {
+		drm_file_err(uq_mgr->file, "Failed to evict userqueue\n");
+		return;
+	}
+
+	/* Signal current eviction fence */
+	amdgpu_eviction_fence_signal(evf_mgr, ev_fence);
+
+	if (evf_mgr->fd_closing) {
+		cancel_delayed_work(&uq_mgr->resume_work);
+		return;
+	}
+
+	/* Schedule a resume work */
+	schedule_delayed_work(&uq_mgr->resume_work, 0);
+}
+
+int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
+			  struct amdgpu_device *adev)
+{
+	mutex_init(&userq_mgr->userq_mutex);
+	idr_init_base(&userq_mgr->userq_idr, 1);
+	userq_mgr->adev = adev;
+	userq_mgr->file = file_priv;
+
+	mutex_lock(&adev->userq_mutex);
+	list_add(&userq_mgr->list, &adev->userq_mgr_list);
+	mutex_unlock(&adev->userq_mutex);
+
+	INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker);
+	return 0;
+}
+
+void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
+{
+	struct amdgpu_device *adev = userq_mgr->adev;
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_userq_mgr *uqm, *tmp;
+	uint32_t queue_id;
+
+	cancel_delayed_work(&userq_mgr->resume_work);
+
+	mutex_lock(&userq_mgr->userq_mutex);
+	idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) {
+		amdgpu_userq_wait_for_last_fence(userq_mgr, queue);
+		amdgpu_userq_unmap_helper(userq_mgr, queue);
+		amdgpu_userq_cleanup(userq_mgr, queue, queue_id);
+	}
+	mutex_lock(&adev->userq_mutex);
+	list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+		if (uqm == userq_mgr) {
+			list_del(&uqm->list);
+			break;
+		}
+	}
+	mutex_unlock(&adev->userq_mutex);
+	idr_destroy(&userq_mgr->userq_idr);
+	mutex_unlock(&userq_mgr->userq_mutex);
+	mutex_destroy(&userq_mgr->userq_mutex);
+}
+
+int amdgpu_userq_suspend(struct amdgpu_device *adev)
+{
+	u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_userq_mgr *uqm, *tmp;
+	int queue_id;
+	int ret = 0, r;
+
+	if (!ip_mask)
+		return 0;
+
+	mutex_lock(&adev->userq_mutex);
+	list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+		cancel_delayed_work_sync(&uqm->resume_work);
+		mutex_lock(&uqm->userq_mutex);
+		idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+			r = amdgpu_userq_unmap_helper(uqm, queue);
+			if (r)
+				ret = r;
+		}
+		mutex_unlock(&uqm->userq_mutex);
+	}
+	mutex_unlock(&adev->userq_mutex);
+	return ret;
+}
+
+int amdgpu_userq_resume(struct amdgpu_device *adev)
+{
+	u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_userq_mgr *uqm, *tmp;
+	int queue_id;
+	int ret = 0, r;
+
+	if (!ip_mask)
+		return 0;
+
+	mutex_lock(&adev->userq_mutex);
+	list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+		mutex_lock(&uqm->userq_mutex);
+		idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+			r = amdgpu_userq_map_helper(uqm, queue);
+			if (r)
+				ret = r;
+		}
+		mutex_unlock(&uqm->userq_mutex);
+	}
+	mutex_unlock(&adev->userq_mutex);
+	return ret;
+}
+
+int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
+						  u32 idx)
+{
+	u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_userq_mgr *uqm, *tmp;
+	int queue_id;
+	int ret = 0, r;
+
+	/* only need to stop gfx/compute */
+	if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
+		return 0;
+
+	mutex_lock(&adev->userq_mutex);
+	if (adev->userq_halt_for_enforce_isolation)
+		dev_warn(adev->dev, "userq scheduling already stopped!\n");
+	adev->userq_halt_for_enforce_isolation = true;
+	list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+		cancel_delayed_work_sync(&uqm->resume_work);
+		mutex_lock(&uqm->userq_mutex);
+		idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+			if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+			     (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+			    (queue->xcp_id == idx)) {
+				r = amdgpu_userq_unmap_helper(uqm, queue);
+				if (r)
+					ret = r;
+			}
+		}
+		mutex_unlock(&uqm->userq_mutex);
+	}
+	mutex_unlock(&adev->userq_mutex);
+	return ret;
+}
+
+int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
+						   u32 idx)
+{
+	u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_userq_mgr *uqm, *tmp;
+	int queue_id;
+	int ret = 0, r;
+
+	/* only need to stop gfx/compute */
+	if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
+		return 0;
+
+	mutex_lock(&adev->userq_mutex);
+	if (!adev->userq_halt_for_enforce_isolation)
+		dev_warn(adev->dev, "userq scheduling already started!\n");
+	adev->userq_halt_for_enforce_isolation = false;
+	list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+		mutex_lock(&uqm->userq_mutex);
+		idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+			if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+			     (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+			    (queue->xcp_id == idx)) {
+				r = amdgpu_userq_map_helper(uqm, queue);
+				if (r)
+					ret = r;
+			}
+		}
+		mutex_unlock(&uqm->userq_mutex);
+	}
+	mutex_unlock(&adev->userq_mutex);
+	return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
new file mode 100644
index 000000000000..ec040c2fd6c9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_USERQ_H_
+#define AMDGPU_USERQ_H_
+#include "amdgpu_eviction_fence.h"
+
+#define AMDGPU_MAX_USERQ_COUNT 512
+
+#define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base)
+#define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr)
+#define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr, name)
+
+enum amdgpu_userq_state {
+	AMDGPU_USERQ_STATE_UNMAPPED = 0,
+	AMDGPU_USERQ_STATE_MAPPED,
+	AMDGPU_USERQ_STATE_PREEMPTED,
+	AMDGPU_USERQ_STATE_HUNG,
+};
+
+struct amdgpu_mqd_prop;
+
+struct amdgpu_userq_obj {
+	void		 *cpu_ptr;
+	uint64_t	 gpu_addr;
+	struct amdgpu_bo *obj;
+};
+
+struct amdgpu_usermode_queue {
+	int			queue_type;
+	enum amdgpu_userq_state state;
+	uint64_t		doorbell_handle;
+	uint64_t		doorbell_index;
+	uint64_t		flags;
+	struct amdgpu_mqd_prop	*userq_prop;
+	struct amdgpu_userq_mgr *userq_mgr;
+	struct amdgpu_vm	*vm;
+	struct amdgpu_userq_obj mqd;
+	struct amdgpu_userq_obj	db_obj;
+	struct amdgpu_userq_obj fw_obj;
+	struct amdgpu_userq_obj wptr_obj;
+	struct xarray		fence_drv_xa;
+	struct amdgpu_userq_fence_driver *fence_drv;
+	struct dma_fence	*last_fence;
+	u32			xcp_id;
+	int			priority;
+};
+
+struct amdgpu_userq_funcs {
+	int (*mqd_create)(struct amdgpu_userq_mgr *uq_mgr,
+			  struct drm_amdgpu_userq_in *args,
+			  struct amdgpu_usermode_queue *queue);
+	void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr,
+			    struct amdgpu_usermode_queue *uq);
+	int (*unmap)(struct amdgpu_userq_mgr *uq_mgr,
+		     struct amdgpu_usermode_queue *queue);
+	int (*map)(struct amdgpu_userq_mgr *uq_mgr,
+		   struct amdgpu_usermode_queue *queue);
+};
+
+/* Usermode queues for gfx */
+struct amdgpu_userq_mgr {
+	struct idr			userq_idr;
+	struct mutex			userq_mutex;
+	struct amdgpu_device		*adev;
+	struct delayed_work		resume_work;
+	struct list_head		list;
+	struct drm_file			*file;
+};
+
+struct amdgpu_db_info {
+	uint64_t doorbell_handle;
+	uint32_t queue_type;
+	uint32_t doorbell_offset;
+	struct amdgpu_userq_obj	*db_obj;
+};
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+
+int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
+			  struct amdgpu_device *adev);
+
+void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr);
+
+int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
+			       struct amdgpu_userq_obj *userq_obj,
+			       int size);
+
+void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
+				 struct amdgpu_userq_obj *userq_obj);
+
+void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
+			struct amdgpu_eviction_fence *ev_fence);
+
+int amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr);
+
+void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
+				  struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+uint64_t amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+					 struct amdgpu_db_info *db_info,
+					     struct drm_file *filp);
+
+u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev);
+
+int amdgpu_userq_suspend(struct amdgpu_device *adev);
+int amdgpu_userq_resume(struct amdgpu_device *adev);
+
+int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
+						  u32 idx);
+int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
+						   u32 idx);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
new file mode 100644
index 000000000000..029cb24c28b3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -0,0 +1,966 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kref.h>
+#include <linux/slab.h>
+#include <linux/dma-fence-unwrap.h>
+
+#include <drm/drm_exec.h>
+#include <drm/drm_syncobj.h>
+
+#include "amdgpu.h"
+#include "amdgpu_userq_fence.h"
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops;
+static struct kmem_cache *amdgpu_userq_fence_slab;
+
+int amdgpu_userq_fence_slab_init(void)
+{
+	amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
+						    sizeof(struct amdgpu_userq_fence),
+						    0,
+						    SLAB_HWCACHE_ALIGN,
+						    NULL);
+	if (!amdgpu_userq_fence_slab)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void amdgpu_userq_fence_slab_fini(void)
+{
+	rcu_barrier();
+	kmem_cache_destroy(amdgpu_userq_fence_slab);
+}
+
+static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f)
+{
+	if (!f || f->ops != &amdgpu_userq_fence_ops)
+		return NULL;
+
+	return container_of(f, struct amdgpu_userq_fence, base);
+}
+
+static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv)
+{
+	return le64_to_cpu(*fence_drv->cpu_addr);
+}
+
+int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
+				    struct amdgpu_usermode_queue *userq)
+{
+	struct amdgpu_userq_fence_driver *fence_drv;
+	unsigned long flags;
+	int r;
+
+	fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL);
+	if (!fence_drv)
+		return -ENOMEM;
+
+	/* Acquire seq64 memory */
+	r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr,
+			       &fence_drv->cpu_addr);
+	if (r)
+		goto free_fence_drv;
+
+	memset(fence_drv->cpu_addr, 0, sizeof(u64));
+
+	kref_init(&fence_drv->refcount);
+	INIT_LIST_HEAD(&fence_drv->fences);
+	spin_lock_init(&fence_drv->fence_list_lock);
+
+	fence_drv->adev = adev;
+	fence_drv->context = dma_fence_context_alloc(1);
+	get_task_comm(fence_drv->timeline_name, current);
+
+	xa_lock_irqsave(&adev->userq_xa, flags);
+	r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index,
+			      fence_drv, GFP_KERNEL));
+	xa_unlock_irqrestore(&adev->userq_xa, flags);
+	if (r)
+		goto free_seq64;
+
+	userq->fence_drv = fence_drv;
+
+	return 0;
+
+free_seq64:
+	amdgpu_seq64_free(adev, fence_drv->va);
+free_fence_drv:
+	kfree(fence_drv);
+
+	return r;
+}
+
+static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa)
+{
+	struct amdgpu_userq_fence_driver *fence_drv;
+	unsigned long index;
+
+	if (xa_empty(xa))
+		return;
+
+	xa_lock(xa);
+	xa_for_each(xa, index, fence_drv) {
+		__xa_erase(xa, index);
+		amdgpu_userq_fence_driver_put(fence_drv);
+	}
+
+	xa_unlock(xa);
+}
+
+void
+amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
+{
+	amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
+	xa_destroy(&userq->fence_drv_xa);
+	/* Drop the fence_drv reference held by user queue */
+	amdgpu_userq_fence_driver_put(userq->fence_drv);
+}
+
+void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
+{
+	struct amdgpu_userq_fence *userq_fence, *tmp;
+	struct dma_fence *fence;
+	u64 rptr;
+	int i;
+
+	if (!fence_drv)
+		return;
+
+	rptr = amdgpu_userq_fence_read(fence_drv);
+
+	spin_lock(&fence_drv->fence_list_lock);
+	list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) {
+		fence = &userq_fence->base;
+
+		if (rptr < fence->seqno)
+			break;
+
+		dma_fence_signal(fence);
+
+		for (i = 0; i < userq_fence->fence_drv_array_count; i++)
+			amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
+
+		list_del(&userq_fence->link);
+		dma_fence_put(fence);
+	}
+	spin_unlock(&fence_drv->fence_list_lock);
+}
+
+void amdgpu_userq_fence_driver_destroy(struct kref *ref)
+{
+	struct amdgpu_userq_fence_driver *fence_drv = container_of(ref,
+					 struct amdgpu_userq_fence_driver,
+					 refcount);
+	struct amdgpu_userq_fence_driver *xa_fence_drv;
+	struct amdgpu_device *adev = fence_drv->adev;
+	struct amdgpu_userq_fence *fence, *tmp;
+	struct xarray *xa = &adev->userq_xa;
+	unsigned long index, flags;
+	struct dma_fence *f;
+
+	spin_lock(&fence_drv->fence_list_lock);
+	list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) {
+		f = &fence->base;
+
+		if (!dma_fence_is_signaled(f)) {
+			dma_fence_set_error(f, -ECANCELED);
+			dma_fence_signal(f);
+		}
+
+		list_del(&fence->link);
+		dma_fence_put(f);
+	}
+	spin_unlock(&fence_drv->fence_list_lock);
+
+	xa_lock_irqsave(xa, flags);
+	xa_for_each(xa, index, xa_fence_drv)
+		if (xa_fence_drv == fence_drv)
+			__xa_erase(xa, index);
+	xa_unlock_irqrestore(xa, flags);
+
+	/* Free seq64 memory */
+	amdgpu_seq64_free(adev, fence_drv->va);
+	kfree(fence_drv);
+}
+
+void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv)
+{
+	kref_get(&fence_drv->refcount);
+}
+
+void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
+{
+	kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
+}
+
+static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence)
+{
+	*userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
+	return *userq_fence ? 0 : -ENOMEM;
+}
+
+static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
+				     struct amdgpu_userq_fence *userq_fence,
+				     u64 seq, struct dma_fence **f)
+{
+	struct amdgpu_userq_fence_driver *fence_drv;
+	struct dma_fence *fence;
+	unsigned long flags;
+
+	fence_drv = userq->fence_drv;
+	if (!fence_drv)
+		return -EINVAL;
+
+	spin_lock_init(&userq_fence->lock);
+	INIT_LIST_HEAD(&userq_fence->link);
+	fence = &userq_fence->base;
+	userq_fence->fence_drv = fence_drv;
+
+	dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
+		       fence_drv->context, seq);
+
+	amdgpu_userq_fence_driver_get(fence_drv);
+	dma_fence_get(fence);
+
+	if (!xa_empty(&userq->fence_drv_xa)) {
+		struct amdgpu_userq_fence_driver *stored_fence_drv;
+		unsigned long index, count = 0;
+		int i = 0;
+
+		xa_lock(&userq->fence_drv_xa);
+		xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
+			count++;
+
+		userq_fence->fence_drv_array =
+			kvmalloc_array(count,
+				       sizeof(struct amdgpu_userq_fence_driver *),
+				       GFP_ATOMIC);
+
+		if (userq_fence->fence_drv_array) {
+			xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
+				userq_fence->fence_drv_array[i] = stored_fence_drv;
+				__xa_erase(&userq->fence_drv_xa, index);
+				i++;
+			}
+		}
+
+		userq_fence->fence_drv_array_count = i;
+		xa_unlock(&userq->fence_drv_xa);
+	} else {
+		userq_fence->fence_drv_array = NULL;
+		userq_fence->fence_drv_array_count = 0;
+	}
+
+	/* Check if hardware has already processed the job */
+	spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+	if (!dma_fence_is_signaled_locked(fence))
+		list_add_tail(&userq_fence->link, &fence_drv->fences);
+	else
+		dma_fence_put(fence);
+
+	spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+
+	*f = fence;
+
+	return 0;
+}
+
+static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
+{
+	return "amdgpu_userq_fence";
+}
+
+static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f)
+{
+	struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+
+	return fence->fence_drv->timeline_name;
+}
+
+static bool amdgpu_userq_fence_signaled(struct dma_fence *f)
+{
+	struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+	struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+	u64 rptr, wptr;
+
+	rptr = amdgpu_userq_fence_read(fence_drv);
+	wptr = fence->base.seqno;
+
+	if (rptr >= wptr)
+		return true;
+
+	return false;
+}
+
+static void amdgpu_userq_fence_free(struct rcu_head *rcu)
+{
+	struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu);
+	struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence);
+	struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv;
+
+	/* Release the fence driver reference */
+	amdgpu_userq_fence_driver_put(fence_drv);
+
+	kvfree(userq_fence->fence_drv_array);
+	kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
+}
+
+static void amdgpu_userq_fence_release(struct dma_fence *f)
+{
+	call_rcu(&f->rcu, amdgpu_userq_fence_free);
+}
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops = {
+	.use_64bit_seqno = true,
+	.get_driver_name = amdgpu_userq_fence_get_driver_name,
+	.get_timeline_name = amdgpu_userq_fence_get_timeline_name,
+	.signaled = amdgpu_userq_fence_signaled,
+	.release = amdgpu_userq_fence_release,
+};
+
+/**
+ * amdgpu_userq_fence_read_wptr - Read the userq wptr value
+ *
+ * @queue: user mode queue structure pointer
+ * @wptr: write pointer value
+ *
+ * Read the wptr value from userq's MQD. The userq signal IOCTL
+ * creates a dma_fence for the shared buffers that expects the
+ * RPTR value written to seq64 memory >= WPTR.
+ *
+ * Returns wptr value on success, error on failure.
+ */
+static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue,
+					u64 *wptr)
+{
+	struct amdgpu_bo_va_mapping *mapping;
+	struct amdgpu_bo *bo;
+	u64 addr, *ptr;
+	int r;
+
+	r = amdgpu_bo_reserve(queue->vm->root.bo, false);
+	if (r)
+		return r;
+
+	addr = queue->userq_prop->wptr_gpu_addr;
+	addr &= AMDGPU_GMC_HOLE_MASK;
+
+	mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT);
+	if (!mapping) {
+		amdgpu_bo_unreserve(queue->vm->root.bo);
+		DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n");
+		return -EINVAL;
+	}
+
+	bo = amdgpu_bo_ref(mapping->bo_va->base.bo);
+	amdgpu_bo_unreserve(queue->vm->root.bo);
+	r = amdgpu_bo_reserve(bo, true);
+	if (r) {
+		DRM_ERROR("Failed to reserve userqueue wptr bo");
+		return r;
+	}
+
+	r = amdgpu_bo_kmap(bo, (void **)&ptr);
+	if (r) {
+		DRM_ERROR("Failed mapping the userqueue wptr bo");
+		goto map_error;
+	}
+
+	*wptr = le64_to_cpu(*ptr);
+
+	amdgpu_bo_kunmap(bo);
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&bo);
+
+	return 0;
+
+map_error:
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&bo);
+
+	return r;
+}
+
+static void amdgpu_userq_fence_cleanup(struct dma_fence *fence)
+{
+	dma_fence_put(fence);
+}
+
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *filp)
+{
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
+	struct drm_amdgpu_userq_signal *args = data;
+	struct drm_gem_object **gobj_write = NULL;
+	struct drm_gem_object **gobj_read = NULL;
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_userq_fence *userq_fence;
+	struct drm_syncobj **syncobj = NULL;
+	u32 *bo_handles_write, num_write_bo_handles;
+	u32 *syncobj_handles, num_syncobj_handles;
+	u32 *bo_handles_read, num_read_bo_handles;
+	int r, i, entry, rentry, wentry;
+	struct dma_fence *fence;
+	struct drm_exec exec;
+	u64 wptr;
+
+	num_syncobj_handles = args->num_syncobj_handles;
+	syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles),
+				      sizeof(u32) * num_syncobj_handles);
+	if (IS_ERR(syncobj_handles))
+		return PTR_ERR(syncobj_handles);
+
+	/* Array of pointers to the looked up syncobjs */
+	syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL);
+	if (!syncobj) {
+		r = -ENOMEM;
+		goto free_syncobj_handles;
+	}
+
+	for (entry = 0; entry < num_syncobj_handles; entry++) {
+		syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]);
+		if (!syncobj[entry]) {
+			r = -ENOENT;
+			goto free_syncobj;
+		}
+	}
+
+	num_read_bo_handles = args->num_bo_read_handles;
+	bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles),
+				      sizeof(u32) * num_read_bo_handles);
+	if (IS_ERR(bo_handles_read)) {
+		r = PTR_ERR(bo_handles_read);
+		goto free_syncobj;
+	}
+
+	/* Array of pointers to the GEM read objects */
+	gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
+	if (!gobj_read) {
+		r = -ENOMEM;
+		goto free_bo_handles_read;
+	}
+
+	for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
+		gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
+		if (!gobj_read[rentry]) {
+			r = -ENOENT;
+			goto put_gobj_read;
+		}
+	}
+
+	num_write_bo_handles = args->num_bo_write_handles;
+	bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles),
+				       sizeof(u32) * num_write_bo_handles);
+	if (IS_ERR(bo_handles_write)) {
+		r = PTR_ERR(bo_handles_write);
+		goto put_gobj_read;
+	}
+
+	/* Array of pointers to the GEM write objects */
+	gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
+	if (!gobj_write) {
+		r = -ENOMEM;
+		goto free_bo_handles_write;
+	}
+
+	for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
+		gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
+		if (!gobj_write[wentry]) {
+			r = -ENOENT;
+			goto put_gobj_write;
+		}
+	}
+
+	/* Retrieve the user queue */
+	queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
+	if (!queue) {
+		r = -ENOENT;
+		goto put_gobj_write;
+	}
+
+	r = amdgpu_userq_fence_read_wptr(queue, &wptr);
+	if (r)
+		goto put_gobj_write;
+
+	r = amdgpu_userq_fence_alloc(&userq_fence);
+	if (r)
+		goto put_gobj_write;
+
+	/* We are here means UQ is active, make sure the eviction fence is valid */
+	amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
+
+	/* Create a new fence */
+	r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
+	if (r) {
+		mutex_unlock(&userq_mgr->userq_mutex);
+		kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
+		goto put_gobj_write;
+	}
+
+	dma_fence_put(queue->last_fence);
+	queue->last_fence = dma_fence_get(fence);
+	mutex_unlock(&userq_mgr->userq_mutex);
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
+		      (num_read_bo_handles + num_write_bo_handles));
+
+	/* Lock all BOs with retry handling */
+	drm_exec_until_all_locked(&exec) {
+		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+		drm_exec_retry_on_contention(&exec);
+		if (r) {
+			amdgpu_userq_fence_cleanup(fence);
+			goto exec_fini;
+		}
+
+		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+		drm_exec_retry_on_contention(&exec);
+		if (r) {
+			amdgpu_userq_fence_cleanup(fence);
+			goto exec_fini;
+		}
+	}
+
+	for (i = 0; i < num_read_bo_handles; i++) {
+		if (!gobj_read || !gobj_read[i]->resv)
+			continue;
+
+		dma_resv_add_fence(gobj_read[i]->resv, fence,
+				   DMA_RESV_USAGE_READ);
+	}
+
+	for (i = 0; i < num_write_bo_handles; i++) {
+		if (!gobj_write || !gobj_write[i]->resv)
+			continue;
+
+		dma_resv_add_fence(gobj_write[i]->resv, fence,
+				   DMA_RESV_USAGE_WRITE);
+	}
+
+	/* Add the created fence to syncobj/BO's */
+	for (i = 0; i < num_syncobj_handles; i++)
+		drm_syncobj_replace_fence(syncobj[i], fence);
+
+	/* drop the reference acquired in fence creation function */
+	dma_fence_put(fence);
+
+exec_fini:
+	drm_exec_fini(&exec);
+put_gobj_write:
+	while (wentry-- > 0)
+		drm_gem_object_put(gobj_write[wentry]);
+	kfree(gobj_write);
+free_bo_handles_write:
+	kfree(bo_handles_write);
+put_gobj_read:
+	while (rentry-- > 0)
+		drm_gem_object_put(gobj_read[rentry]);
+	kfree(gobj_read);
+free_bo_handles_read:
+	kfree(bo_handles_read);
+free_syncobj:
+	while (entry-- > 0)
+		if (syncobj[entry])
+			drm_syncobj_put(syncobj[entry]);
+	kfree(syncobj);
+free_syncobj_handles:
+	kfree(syncobj_handles);
+
+	return r;
+}
+
+int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *filp)
+{
+	u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write;
+	u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
+	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
+	struct drm_amdgpu_userq_wait *wait_info = data;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
+	struct amdgpu_usermode_queue *waitq;
+	struct drm_gem_object **gobj_write;
+	struct drm_gem_object **gobj_read;
+	struct dma_fence **fences = NULL;
+	u16 num_points, num_fences = 0;
+	int r, i, rentry, wentry, cnt;
+	struct drm_exec exec;
+
+	num_read_bo_handles = wait_info->num_bo_read_handles;
+	bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles),
+				      sizeof(u32) * num_read_bo_handles);
+	if (IS_ERR(bo_handles_read))
+		return PTR_ERR(bo_handles_read);
+
+	num_write_bo_handles = wait_info->num_bo_write_handles;
+	bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles),
+				       sizeof(u32) * num_write_bo_handles);
+	if (IS_ERR(bo_handles_write)) {
+		r = PTR_ERR(bo_handles_write);
+		goto free_bo_handles_read;
+	}
+
+	num_syncobj = wait_info->num_syncobj_handles;
+	syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles),
+				      sizeof(u32) * num_syncobj);
+	if (IS_ERR(syncobj_handles)) {
+		r = PTR_ERR(syncobj_handles);
+		goto free_bo_handles_write;
+	}
+
+	num_points = wait_info->num_syncobj_timeline_handles;
+	timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
+				       sizeof(u32) * num_points);
+	if (IS_ERR(timeline_handles)) {
+		r = PTR_ERR(timeline_handles);
+		goto free_syncobj_handles;
+	}
+
+	timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
+				      sizeof(u32) * num_points);
+	if (IS_ERR(timeline_points)) {
+		r = PTR_ERR(timeline_points);
+		goto free_timeline_handles;
+	}
+
+	gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
+	if (!gobj_read) {
+		r = -ENOMEM;
+		goto free_timeline_points;
+	}
+
+	for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
+		gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
+		if (!gobj_read[rentry]) {
+			r = -ENOENT;
+			goto put_gobj_read;
+		}
+	}
+
+	gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
+	if (!gobj_write) {
+		r = -ENOMEM;
+		goto put_gobj_read;
+	}
+
+	for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
+		gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
+		if (!gobj_write[wentry]) {
+			r = -ENOENT;
+			goto put_gobj_write;
+		}
+	}
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
+		      (num_read_bo_handles + num_write_bo_handles));
+
+	/* Lock all BOs with retry handling */
+	drm_exec_until_all_locked(&exec) {
+		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+		drm_exec_retry_on_contention(&exec);
+		if (r) {
+			drm_exec_fini(&exec);
+			goto put_gobj_write;
+		}
+
+		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+		drm_exec_retry_on_contention(&exec);
+		if (r) {
+			drm_exec_fini(&exec);
+			goto put_gobj_write;
+		}
+	}
+
+	if (!wait_info->num_fences) {
+		if (num_points) {
+			struct dma_fence_unwrap iter;
+			struct dma_fence *fence;
+			struct dma_fence *f;
+
+			for (i = 0; i < num_points; i++) {
+				r = drm_syncobj_find_fence(filp, timeline_handles[i],
+							   timeline_points[i],
+							   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+							   &fence);
+				if (r)
+					goto exec_fini;
+
+				dma_fence_unwrap_for_each(f, &iter, fence)
+					num_fences++;
+
+				dma_fence_put(fence);
+			}
+		}
+
+		/* Count syncobj's fence */
+		for (i = 0; i < num_syncobj; i++) {
+			struct dma_fence *fence;
+
+			r = drm_syncobj_find_fence(filp, syncobj_handles[i],
+						   0,
+						   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+						   &fence);
+			if (r)
+				goto exec_fini;
+
+			num_fences++;
+			dma_fence_put(fence);
+		}
+
+		/* Count GEM objects fence */
+		for (i = 0; i < num_read_bo_handles; i++) {
+			struct dma_resv_iter resv_cursor;
+			struct dma_fence *fence;
+
+			dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
+						DMA_RESV_USAGE_READ, fence)
+				num_fences++;
+		}
+
+		for (i = 0; i < num_write_bo_handles; i++) {
+			struct dma_resv_iter resv_cursor;
+			struct dma_fence *fence;
+
+			dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
+						DMA_RESV_USAGE_WRITE, fence)
+				num_fences++;
+		}
+
+		/*
+		 * Passing num_fences = 0 means that userspace doesn't want to
+		 * retrieve userq_fence_info. If num_fences = 0 we skip filling
+		 * userq_fence_info and return the actual number of fences on
+		 * args->num_fences.
+		 */
+		wait_info->num_fences = num_fences;
+	} else {
+		/* Array of fence info */
+		fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL);
+		if (!fence_info) {
+			r = -ENOMEM;
+			goto exec_fini;
+		}
+
+		/* Array of fences */
+		fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL);
+		if (!fences) {
+			r = -ENOMEM;
+			goto free_fence_info;
+		}
+
+		/* Retrieve GEM read objects fence */
+		for (i = 0; i < num_read_bo_handles; i++) {
+			struct dma_resv_iter resv_cursor;
+			struct dma_fence *fence;
+
+			dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
+						DMA_RESV_USAGE_READ, fence) {
+				if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+					r = -EINVAL;
+					goto free_fences;
+				}
+
+				fences[num_fences++] = fence;
+				dma_fence_get(fence);
+			}
+		}
+
+		/* Retrieve GEM write objects fence */
+		for (i = 0; i < num_write_bo_handles; i++) {
+			struct dma_resv_iter resv_cursor;
+			struct dma_fence *fence;
+
+			dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
+						DMA_RESV_USAGE_WRITE, fence) {
+				if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+					r = -EINVAL;
+					goto free_fences;
+				}
+
+				fences[num_fences++] = fence;
+				dma_fence_get(fence);
+			}
+		}
+
+		if (num_points) {
+			struct dma_fence_unwrap iter;
+			struct dma_fence *fence;
+			struct dma_fence *f;
+
+			for (i = 0; i < num_points; i++) {
+				r = drm_syncobj_find_fence(filp, timeline_handles[i],
+							   timeline_points[i],
+							   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+							   &fence);
+				if (r)
+					goto free_fences;
+
+				dma_fence_unwrap_for_each(f, &iter, fence) {
+					if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+						r = -EINVAL;
+						goto free_fences;
+					}
+
+					dma_fence_get(f);
+					fences[num_fences++] = f;
+				}
+
+				dma_fence_put(fence);
+			}
+		}
+
+		/* Retrieve syncobj's fence */
+		for (i = 0; i < num_syncobj; i++) {
+			struct dma_fence *fence;
+
+			r = drm_syncobj_find_fence(filp, syncobj_handles[i],
+						   0,
+						   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+						   &fence);
+			if (r)
+				goto free_fences;
+
+			if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+				r = -EINVAL;
+				goto free_fences;
+			}
+
+			fences[num_fences++] = fence;
+		}
+
+		/*
+		 * Keep only the latest fences to reduce the number of values
+		 * given back to userspace.
+		 */
+		num_fences = dma_fence_dedup_array(fences, num_fences);
+
+		waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id);
+		if (!waitq)
+			goto free_fences;
+
+		for (i = 0, cnt = 0; i < num_fences; i++) {
+			struct amdgpu_userq_fence_driver *fence_drv;
+			struct amdgpu_userq_fence *userq_fence;
+			u32 index;
+
+			userq_fence = to_amdgpu_userq_fence(fences[i]);
+			if (!userq_fence) {
+				/*
+				 * Just waiting on other driver fences should
+				 * be good for now
+				 */
+				r = dma_fence_wait(fences[i], true);
+				if (r) {
+					dma_fence_put(fences[i]);
+					goto free_fences;
+				}
+
+				dma_fence_put(fences[i]);
+				continue;
+			}
+
+			fence_drv = userq_fence->fence_drv;
+			/*
+			 * We need to make sure the user queue release their reference
+			 * to the fence drivers at some point before queue destruction.
+			 * Otherwise, we would gather those references until we don't
+			 * have any more space left and crash.
+			 */
+			r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
+				     xa_limit_32b, GFP_KERNEL);
+			if (r)
+				goto free_fences;
+
+			amdgpu_userq_fence_driver_get(fence_drv);
+
+			/* Store drm syncobj's gpu va address and value */
+			fence_info[cnt].va = fence_drv->va;
+			fence_info[cnt].value = fences[i]->seqno;
+
+			dma_fence_put(fences[i]);
+			/* Increment the actual userq fence count */
+			cnt++;
+		}
+
+		wait_info->num_fences = cnt;
+		/* Copy userq fence info to user space */
+		if (copy_to_user(u64_to_user_ptr(wait_info->out_fences),
+				 fence_info, wait_info->num_fences * sizeof(*fence_info))) {
+			r = -EFAULT;
+			goto free_fences;
+		}
+
+		kfree(fences);
+		kfree(fence_info);
+	}
+
+	drm_exec_fini(&exec);
+	for (i = 0; i < num_read_bo_handles; i++)
+		drm_gem_object_put(gobj_read[i]);
+	kfree(gobj_read);
+
+	for (i = 0; i < num_write_bo_handles; i++)
+		drm_gem_object_put(gobj_write[i]);
+	kfree(gobj_write);
+
+	kfree(timeline_points);
+	kfree(timeline_handles);
+	kfree(syncobj_handles);
+	kfree(bo_handles_write);
+	kfree(bo_handles_read);
+
+	return 0;
+
+free_fences:
+	while (num_fences-- > 0)
+		dma_fence_put(fences[num_fences]);
+	kfree(fences);
+free_fence_info:
+	kfree(fence_info);
+exec_fini:
+	drm_exec_fini(&exec);
+put_gobj_write:
+	while (wentry-- > 0)
+		drm_gem_object_put(gobj_write[wentry]);
+	kfree(gobj_write);
+put_gobj_read:
+	while (rentry-- > 0)
+		drm_gem_object_put(gobj_read[rentry]);
+	kfree(gobj_read);
+free_timeline_points:
+	kfree(timeline_points);
+free_timeline_handles:
+	kfree(timeline_handles);
+free_syncobj_handles:
+	kfree(syncobj_handles);
+free_bo_handles_write:
+	kfree(bo_handles_write);
+free_bo_handles_read:
+	kfree(bo_handles_read);
+
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
new file mode 100644
index 000000000000..97a125ab8a78
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_USERQ_FENCE_H__
+#define __AMDGPU_USERQ_FENCE_H__
+
+#include <linux/types.h>
+
+#include "amdgpu_userq.h"
+
+struct amdgpu_userq_fence {
+	struct dma_fence base;
+	/*
+	 * This lock is necessary to synchronize the
+	 * userqueue dma fence operations.
+	 */
+	spinlock_t lock;
+	struct list_head link;
+	unsigned long fence_drv_array_count;
+	struct amdgpu_userq_fence_driver *fence_drv;
+	struct amdgpu_userq_fence_driver **fence_drv_array;
+};
+
+struct amdgpu_userq_fence_driver {
+	struct kref refcount;
+	u64 va;
+	u64 gpu_addr;
+	u64 *cpu_addr;
+	u64 context;
+	/*
+	 * This lock is necesaary to synchronize the access
+	 * to the fences list by the fence driver.
+	 */
+	spinlock_t fence_list_lock;
+	struct list_head fences;
+	struct amdgpu_device *adev;
+	char timeline_name[TASK_COMM_LEN];
+};
+
+int amdgpu_userq_fence_slab_init(void);
+void amdgpu_userq_fence_slab_fini(void);
+
+void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv);
+void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv);
+int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
+				    struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv);
+void amdgpu_userq_fence_driver_destroy(struct kref *ref);
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *filp);
+int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *filp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 1991dd3d1056..c8885c3d54b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -353,9 +353,9 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i)
 
 	cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
 
-	/* err_event_athub will corrupt VCPU buffer, so we need to
+	/* err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to
 	 * restore fw data and clear buffer in amdgpu_vcn_resume() */
-	if (in_ras_intr)
+	if (in_ras_intr || adev->pcie_reset_ctx.in_link_reset)
 		return 0;
 
 	return amdgpu_vcn_save_vcpu_bo_inst(adev, i);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index cdcdae7f71ce..83adf81defc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -66,7 +66,6 @@
 #define VCN_ENC_CMD_REG_WAIT		0x0000000c
 
 #define VCN_AON_SOC_ADDRESS_2_0 	0x1f800
-#define VCN1_AON_SOC_ADDRESS_3_0 	0x48000
 #define VCN_VID_IP_ADDRESS_2_0		0x0
 #define VCN_AON_IP_ADDRESS_2_0		0x30000
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 0bb8cbe0dcc0..13f0cdeb59c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -1323,6 +1323,9 @@ static int amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bo
 {
 	struct amdgpu_virt *virt = &adev->virt;
 
+	if (!virt->ops || !virt->ops->req_ras_err_count)
+		return -EOPNOTSUPP;
+
 	/* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
 	 * will ignore incoming guest messages. Ratelimit the guest messages to
 	 * prevent guest self DOS.
@@ -1378,14 +1381,16 @@ amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
 	used_size = host_telemetry->header.used_size;
 
 	if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
-		return 0;
+		return -EINVAL;
 
 	cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
 	if (!cper_dump)
 		return -ENOMEM;
 
-	if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0))
+	if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) {
+		ret = -EINVAL;
 		goto out;
+	}
 
 	*more = cper_dump->more;
 
@@ -1425,7 +1430,7 @@ static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev)
 	int ret = 0;
 	uint32_t more = 0;
 
-	if (!amdgpu_sriov_ras_cper_en(adev))
+	if (!virt->ops || !virt->ops->req_ras_cper_dump)
 		return -EOPNOTSUPP;
 
 	do {
@@ -1434,7 +1439,7 @@ static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev)
 				adev, virt->fw_reserve.ras_telemetry, &more);
 		else
 			ret = 0;
-	} while (more);
+	} while (more && !ret);
 
 	return ret;
 }
@@ -1444,6 +1449,9 @@ int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update)
 	struct amdgpu_virt *virt = &adev->virt;
 	int ret = 0;
 
+	if (!amdgpu_sriov_ras_cper_en(adev))
+		return -EOPNOTSUPP;
+
 	if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) &&
 	    down_read_trylock(&adev->reset_domain->sem)) {
 		mutex_lock(&virt->ras.ras_telemetry_mutex);
@@ -1480,3 +1488,16 @@ bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
 
 	return true;
 }
+
+/*
+ * amdgpu_virt_request_bad_pages() - request bad pages
+ * @adev: amdgpu device.
+ * Send command to GPU hypervisor to write new bad pages into the shared PF2VF region
+ */
+void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+
+	if (virt->ops && virt->ops->req_bad_pages)
+		virt->ops->req_bad_pages(adev);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index df03dba67ab8..577c6194db78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -97,6 +97,7 @@ struct amdgpu_virt_ops {
 	bool (*rcvd_ras_intr)(struct amdgpu_device *adev);
 	int (*req_ras_err_count)(struct amdgpu_device *adev);
 	int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr);
+	int (*req_bad_pages)(struct amdgpu_device *adev);
 };
 
 /*
@@ -146,11 +147,13 @@ enum AMDGIM_FEATURE_FLAG {
 
 enum AMDGIM_REG_ACCESS_FLAG {
 	/* Use PSP to program IH_RB_CNTL */
-	AMDGIM_FEATURE_IH_REG_PSP_EN     = (1 << 0),
+	AMDGIM_FEATURE_IH_REG_PSP_EN      = (1 << 0),
 	/* Use RLC to program MMHUB regs */
-	AMDGIM_FEATURE_MMHUB_REG_RLC_EN  = (1 << 1),
+	AMDGIM_FEATURE_MMHUB_REG_RLC_EN   = (1 << 1),
 	/* Use RLC to program GC regs */
-	AMDGIM_FEATURE_GC_REG_RLC_EN     = (1 << 2),
+	AMDGIM_FEATURE_GC_REG_RLC_EN      = (1 << 2),
+	/* Use PSP to program L1_TLB_CNTL*/
+	AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN = (1 << 3),
 };
 
 struct amdgim_pf2vf_info_v1 {
@@ -260,7 +263,10 @@ struct amdgpu_virt {
 	uint32_t			reg_val_offs;
 	struct amdgpu_irq_src		ack_irq;
 	struct amdgpu_irq_src		rcv_irq;
+
 	struct work_struct		flr_work;
+	struct work_struct		bad_pages_work;
+
 	struct amdgpu_mm_table		mm_table;
 	const struct amdgpu_virt_ops	*ops;
 	struct amdgpu_vf_error_buffer	vf_errors;
@@ -330,6 +336,10 @@ struct amdgpu_video_codec_info;
 (amdgpu_sriov_vf((adev)) && \
 	((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN)))
 
+#define amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev) \
+(amdgpu_sriov_vf((adev)) && \
+	((adev)->virt.reg_access & (AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN)))
+
 #define amdgpu_sriov_rlcg_error_report_enabled(adev) \
         (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev))
 
@@ -423,4 +433,5 @@ int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update)
 int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev);
 bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
 					enum amdgpu_ras_block block);
+void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ce52b4d75e94..3911c78f8282 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -787,7 +787,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 	pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
 		ring->funcs->emit_wreg;
 
-	cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
+	cleaner_shader_needed = job->run_cleaner_shader &&
+		adev->gfx.enable_cleaner_shader &&
 		ring->funcs->emit_cleaner_shader && job->base.s_fence &&
 		&job->base.s_fence->scheduled == isolation->spearhead;
 
@@ -817,7 +818,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 	if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
 		adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
 
-	if (!ring->is_mes_queue && ring->funcs->emit_gds_switch &&
+	if (ring->funcs->emit_gds_switch &&
 	    gds_switch_needed) {
 		amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
 					    job->gds_size, job->gws_base,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 477424472bbe..f51ef4cf16e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -296,15 +296,27 @@ static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = {
 
 static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link_num)
 {
-	const u32 smnpcs_xgmi3x16_pcs_state_hist1 = 0x11a00070;
-	const int xgmi_inst = 2;
-	u32 link_inst;
+	const u32 smn_xgmi_6_4_pcs_state_hist1[2] = { 0x11a00070, 0x11b00070 };
+	const u32 smn_xgmi_6_4_1_pcs_state_hist1[2] = { 0x12100070,
+							0x11b00070 };
+	u32 i, n;
 	u64 addr;
 
-	link_inst = global_link_num % xgmi_inst;
+	switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+	case IP_VERSION(6, 4, 0):
+		n = ARRAY_SIZE(smn_xgmi_6_4_pcs_state_hist1);
+		addr = smn_xgmi_6_4_pcs_state_hist1[global_link_num % n];
+		break;
+	case IP_VERSION(6, 4, 1):
+		n = ARRAY_SIZE(smn_xgmi_6_4_1_pcs_state_hist1);
+		addr = smn_xgmi_6_4_1_pcs_state_hist1[global_link_num % n];
+		break;
+	default:
+		return U32_MAX;
+	}
 
-	addr = (smnpcs_xgmi3x16_pcs_state_hist1 | (link_inst << 20)) +
-		adev->asic_funcs->encode_ext_smn_addressing(global_link_num / xgmi_inst);
+	i = global_link_num / n;
+	addr += adev->asic_funcs->encode_ext_smn_addressing(i);
 
 	return RREG32_PCIE_EXT(addr);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index d6ac2652f0ac..92ca13097aaa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -109,10 +109,11 @@ union amd_sriov_msg_feature_flags {
 
 union amd_sriov_reg_access_flags {
 	struct {
-		uint32_t vf_reg_access_ih	: 1;
-		uint32_t vf_reg_access_mmhub	: 1;
-		uint32_t vf_reg_access_gc	: 1;
-		uint32_t reserved		: 29;
+		uint32_t vf_reg_access_ih		: 1;
+		uint32_t vf_reg_access_mmhub		: 1;
+		uint32_t vf_reg_access_gc		: 1;
+		uint32_t vf_reg_access_l1_tlb_cntl	: 1;
+		uint32_t reserved			: 28;
 	} flags;
 	uint32_t all;
 };
@@ -330,6 +331,7 @@ enum amd_sriov_mailbox_request_message {
 	MB_REQ_MSG_RAS_POISON = 202,
 	MB_REQ_RAS_ERROR_COUNT = 203,
 	MB_REQ_RAS_CPER_DUMP = 204,
+	MB_REQ_RAS_BAD_PAGES = 205,
 };
 
 /* mailbox message send from host to guest  */
@@ -347,6 +349,9 @@ enum amd_sriov_mailbox_response_message {
 	MB_RES_MSG_GPU_RMA			= 10,
 	MB_RES_MSG_RAS_ERROR_COUNT_READY	= 11,
 	MB_REQ_RAS_CPER_DUMP_READY		= 14,
+	MB_RES_MSG_RAS_BAD_PAGES_READY		= 15,
+	MB_RES_MSG_RAS_BAD_PAGES_NOTIFICATION	= 16,
+	MB_RES_MSG_UNRECOV_ERR_NOTIFICATION	= 17,
 	MB_RES_MSG_TEXT_MESSAGE			= 255
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 81d195d366ce..427b073de2fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -1444,6 +1444,7 @@ static void atom_get_vbios_pn(struct atom_context *ctx)
 		if (vbios_str == NULL)
 			vbios_str += sizeof(BIOS_ATOM_PREFIX) - 1;
 	}
+	OPTIMIZER_HIDE_VAR(vbios_str);
 	if (vbios_str != NULL && *vbios_str == 0)
 		vbios_str++;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 508cea965983..9e8715b4739d 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -56,6 +56,8 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
 static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
 static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block);
 
+u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
+
 MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin");
 MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin");
 MODULE_FIRMWARE("amdgpu/hawaii_sdma.bin");
@@ -67,9 +69,6 @@ MODULE_FIRMWARE("amdgpu/kabini_sdma1.bin");
 MODULE_FIRMWARE("amdgpu/mullins_sdma.bin");
 MODULE_FIRMWARE("amdgpu/mullins_sdma1.bin");
 
-u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
-
-
 static void cik_sdma_free_microcode(struct amdgpu_device *adev)
 {
 	int i;
@@ -993,14 +992,9 @@ static int cik_sdma_sw_fini(struct amdgpu_ip_block *ip_block)
 
 static int cik_sdma_hw_init(struct amdgpu_ip_block *ip_block)
 {
-	int r;
 	struct amdgpu_device *adev = ip_block->adev;
 
-	r = cik_sdma_start(adev);
-	if (r)
-		return r;
-
-	return r;
+	return cik_sdma_start(adev);
 }
 
 static int cik_sdma_hw_fini(struct amdgpu_ip_block *ip_block)
@@ -1040,14 +1034,10 @@ static bool cik_sdma_is_idle(struct amdgpu_ip_block *ip_block)
 static int cik_sdma_wait_for_idle(struct amdgpu_ip_block *ip_block)
 {
 	unsigned i;
-	u32 tmp;
 	struct amdgpu_device *adev = ip_block->adev;
 
 	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
-				SRBM_STATUS2__SDMA1_BUSY_MASK);
-
-		if (!tmp)
+		if (cik_sdma_is_idle(ip_block))
 			return 0;
 		udelay(1);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index 279288365940..8aca4f2734f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -60,9 +60,6 @@
 #define AUD5_REGISTER_OFFSET                 (0x179d - 0x1780)
 #define AUD6_REGISTER_OFFSET                 (0x17a4 - 0x1780)
 
-#define BONAIRE_GB_ADDR_CONFIG_GOLDEN        0x12010001
-#define HAWAII_GB_ADDR_CONFIG_GOLDEN         0x12011003
-
 #define		PIPEID(x)					((x) << 0)
 #define		MEID(x)						((x) << 2)
 #define		VMID(x)						((x) << 4)
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index df401aded662..bf7c22f81cda 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -3075,7 +3075,7 @@ static int dce_v10_0_set_hpd_irq_state(struct amdgpu_device *adev,
 	u32 tmp;
 
 	if (hpd >= adev->mode_info.num_hpd) {
-		DRM_DEBUG("invalid hdp %d\n", hpd);
+		DRM_DEBUG("invalid hpd %d\n", hpd);
 		return 0;
 	}
 
@@ -3227,7 +3227,7 @@ static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev,
 	u32 tmp;
 
 	if (hpd >= adev->mode_info.num_hpd) {
-		DRM_DEBUG("invalid hdp %d\n", hpd);
+		DRM_DEBUG("invalid hpd %d\n", hpd);
 		return;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 80f01c3989cd..47e05783c4a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -3206,7 +3206,7 @@ static int dce_v11_0_set_hpd_irq_state(struct amdgpu_device *adev,
 	u32 tmp;
 
 	if (hpd >= adev->mode_info.num_hpd) {
-		DRM_DEBUG("invalid hdp %d\n", hpd);
+		DRM_DEBUG("invalid hpd %d\n", hpd);
 		return 0;
 	}
 
@@ -3358,7 +3358,7 @@ static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev,
 	u32 tmp;
 
 	if (hpd >= adev->mode_info.num_hpd) {
-		DRM_DEBUG("invalid hdp %d\n", hpd);
+		DRM_DEBUG("invalid hpd %d\n", hpd);
 		return;
 	}
 
@@ -3488,8 +3488,7 @@ static const struct amd_ip_funcs dce_v11_0_ip_funcs = {
 	.set_powergating_state = dce_v11_0_set_powergating_state,
 };
 
-static void
-dce_v11_0_encoder_mode_set(struct drm_encoder *encoder,
+static void dce_v11_0_encoder_mode_set(struct drm_encoder *encoder,
 			  struct drm_display_mode *mode,
 			  struct drm_display_mode *adjusted_mode)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 255c70959343..276c025c4c03 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -287,7 +287,7 @@ static void dce_v6_0_hpd_int_ack(struct amdgpu_device *adev,
 	u32 tmp;
 
 	if (hpd >= adev->mode_info.num_hpd) {
-		DRM_DEBUG("invalid hdp %d\n", hpd);
+		DRM_DEBUG("invalid hpd %d\n", hpd);
 		return;
 	}
 
@@ -412,7 +412,7 @@ static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
 {
 	if (!render)
 		WREG32(mmVGA_RENDER_CONTROL,
-			RREG32(mmVGA_RENDER_CONTROL) & VGA_VSTATUS_CNTL);
+		       RREG32(mmVGA_RENDER_CONTROL) & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK);
 }
 
 static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
@@ -1011,16 +1011,16 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
 	/* select wm A */
 	arb_control3 = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
 	tmp = arb_control3;
-	tmp &= ~LATENCY_WATERMARK_MASK(3);
-	tmp |= LATENCY_WATERMARK_MASK(1);
+	tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
+	tmp |= (1 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
 	WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
 	WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
 	       ((latency_watermark_a << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT)  |
 		(line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT)));
 	/* select wm B */
 	tmp = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
-	tmp &= ~LATENCY_WATERMARK_MASK(3);
-	tmp |= LATENCY_WATERMARK_MASK(2);
+	tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
+	tmp |= (2 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
 	WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
 	WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
 	       ((latency_watermark_b << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
@@ -1089,7 +1089,7 @@ static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev,
 	}
 
 	WREG32(mmDC_LB_MEMORY_SPLIT + amdgpu_crtc->crtc_offset,
-	       DC_LB_MEMORY_CONFIG(tmp));
+	       (tmp << DC_LB_MEMORY_SPLIT__DC_LB_MEMORY_CONFIG__SHIFT));
 
 	WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
 	       (buffer_alloc << PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATED__SHIFT));
@@ -1306,6 +1306,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
 	struct amdgpu_device *adev = drm_to_adev(dev);
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 offset;
 	struct drm_connector *connector;
 	struct drm_connector_list_iter iter;
 	struct amdgpu_connector *amdgpu_connector = NULL;
@@ -1327,6 +1328,11 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
 		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
 	};
 
+	if (!dig || !dig->afmt || !dig->afmt->pin)
+		return;
+
+	offset = dig->afmt->pin->offset;
+
 	drm_connector_list_iter_begin(dev, &iter);
 	drm_for_each_connector_iter(connector, &iter) {
 		if (connector->encoder == encoder) {
@@ -1348,7 +1354,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
 		return;
 
 	for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
-		u32 tmp = 0;
+		u32 value = 0;
 		u8 stereo_freqs = 0;
 		int max_channels = -1;
 		int j;
@@ -1358,12 +1364,12 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
 
 			if (sad->format == eld_reg_to_type[i][1]) {
 				if (sad->channels > max_channels) {
-					tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
-							MAX_CHANNELS, sad->channels);
-					tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
-							DESCRIPTOR_BYTE_2, sad->byte2);
-					tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
-							SUPPORTED_FREQUENCIES, sad->freq);
+					value = (sad->channels <<
+						AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) |
+					       (sad->byte2 <<
+						AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) |
+					       (sad->freq <<
+						AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT);
 					max_channels = sad->channels;
 				}
 
@@ -1374,13 +1380,13 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
 			}
 		}
 
-		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
-				SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
-		WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
+		value |= (stereo_freqs <<
+			AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES_STEREO__SHIFT);
+
+		WREG32_AUDIO_ENDPT(offset, eld_reg_to_type[i][0], value);
 	}
 
 	kfree(sads);
-
 }
 
 static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
@@ -1886,7 +1892,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
 	struct amdgpu_bo *abo;
 	uint64_t fb_location, tiling_flags;
 	uint32_t fb_format, fb_pitch_pixels, pipe_config;
-	u32 fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_NONE);
+	u32 fb_swap = (GRPH_ENDIAN_NONE << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
 	u32 viewport_w, viewport_h;
 	int r;
 	bool bypass_lut = false;
@@ -1926,76 +1932,76 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
 
 	switch (target_fb->format->format) {
 	case DRM_FORMAT_C8:
-		fb_format = (GRPH_DEPTH(GRPH_DEPTH_8BPP) |
-			     GRPH_FORMAT(GRPH_FORMAT_INDEXED));
+		fb_format = ((GRPH_DEPTH_8BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_INDEXED << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
 		break;
 	case DRM_FORMAT_XRGB4444:
 	case DRM_FORMAT_ARGB4444:
-		fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
-			     GRPH_FORMAT(GRPH_FORMAT_ARGB4444));
+		fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB4444 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
 #ifdef __BIG_ENDIAN
-		fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+		fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
 #endif
 		break;
 	case DRM_FORMAT_XRGB1555:
 	case DRM_FORMAT_ARGB1555:
-		fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
-			     GRPH_FORMAT(GRPH_FORMAT_ARGB1555));
+		fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB1555 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
 #ifdef __BIG_ENDIAN
-		fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+		fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
 #endif
 		break;
 	case DRM_FORMAT_BGRX5551:
 	case DRM_FORMAT_BGRA5551:
-		fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
-			     GRPH_FORMAT(GRPH_FORMAT_BGRA5551));
+		fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_BGRA5551 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
 #ifdef __BIG_ENDIAN
-		fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+		fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
 #endif
 		break;
 	case DRM_FORMAT_RGB565:
-		fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
-			     GRPH_FORMAT(GRPH_FORMAT_ARGB565));
+		fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB565 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
 #ifdef __BIG_ENDIAN
-		fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+		fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
 #endif
 		break;
 	case DRM_FORMAT_XRGB8888:
 	case DRM_FORMAT_ARGB8888:
-		fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
-			     GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
+		fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
 #ifdef __BIG_ENDIAN
-		fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+		fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
 #endif
 		break;
 	case DRM_FORMAT_XRGB2101010:
 	case DRM_FORMAT_ARGB2101010:
-		fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
-			     GRPH_FORMAT(GRPH_FORMAT_ARGB2101010));
+		fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB2101010 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
 #ifdef __BIG_ENDIAN
-		fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+		fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
 #endif
 		/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
 		bypass_lut = true;
 		break;
 	case DRM_FORMAT_BGRX1010102:
 	case DRM_FORMAT_BGRA1010102:
-		fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
-			     GRPH_FORMAT(GRPH_FORMAT_BGRA1010102));
+		fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_BGRA1010102 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
 #ifdef __BIG_ENDIAN
-		fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+		fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
 #endif
 		/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
 		bypass_lut = true;
 		break;
 	case DRM_FORMAT_XBGR8888:
 	case DRM_FORMAT_ABGR8888:
-		fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
-			     GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
-		fb_swap = (GRPH_RED_CROSSBAR(GRPH_RED_SEL_B) |
-			   GRPH_BLUE_CROSSBAR(GRPH_BLUE_SEL_R));
+		fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+		fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) |
+			   (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
 #ifdef __BIG_ENDIAN
-		fb_swap |= GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+		fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
 #endif
 		break;
 	default:
@@ -2013,18 +2019,18 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
 		tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
 		num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
 
-		fb_format |= GRPH_NUM_BANKS(num_banks);
-		fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_2D_TILED_THIN1);
-		fb_format |= GRPH_TILE_SPLIT(tile_split);
-		fb_format |= GRPH_BANK_WIDTH(bankw);
-		fb_format |= GRPH_BANK_HEIGHT(bankh);
-		fb_format |= GRPH_MACRO_TILE_ASPECT(mtaspect);
+		fb_format |= (num_banks << GRPH_CONTROL__GRPH_NUM_BANKS__SHIFT);
+		fb_format |= (GRPH_ARRAY_2D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
+		fb_format |= (tile_split << GRPH_CONTROL__GRPH_TILE_SPLIT__SHIFT);
+		fb_format |= (bankw << GRPH_CONTROL__GRPH_BANK_WIDTH__SHIFT);
+		fb_format |= (bankh << GRPH_CONTROL__GRPH_BANK_HEIGHT__SHIFT);
+		fb_format |= (mtaspect << GRPH_CONTROL__GRPH_MACRO_TILE_ASPECT__SHIFT);
 	} else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
-		fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_1D_TILED_THIN1);
+		fb_format |= (GRPH_ARRAY_1D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
 	}
 
 	pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
-	fb_format |= GRPH_PIPE_CONFIG(pipe_config);
+	fb_format |= (pipe_config << GRPH_CONTROL__GRPH_PIPE_CONFIG__SHIFT);
 
 	dce_v6_0_vga_enable(crtc, false);
 
@@ -2040,7 +2046,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
 	       (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
 	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
-	       (u32) fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
+	       (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
 	WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
 	WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
 
@@ -2108,14 +2114,13 @@ static void dce_v6_0_set_interleave(struct drm_crtc *crtc,
 
 	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
 		WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset,
-		       INTERLEAVE_EN);
+			DATA_FORMAT__INTERLEAVE_EN_MASK);
 	else
 		WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset, 0);
 }
 
 static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
 {
-
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = drm_to_adev(dev);
@@ -2125,15 +2130,15 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
 	DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
 
 	WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
-	       ((0 << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
-		(0 << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
+	       ((INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
+		(INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
 	WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset,
 	       PRESCALE_GRPH_CONTROL__GRPH_PRESCALE_BYPASS_MASK);
 	WREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset,
 	       PRESCALE_OVL_CONTROL__OVL_PRESCALE_BYPASS_MASK);
 	WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset,
-	       ((0 << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
-		(0 << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
+	       ((INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
+		(INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
 
 	WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
 
@@ -2160,19 +2165,19 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
 	}
 
 	WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
-	       ((0 << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
-		(0 << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
-		ICON_DEGAMMA_MODE(0) |
-		(0 << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
+	       ((DEGAMMA_BYPASS << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
+		(DEGAMMA_BYPASS << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
+		(DEGAMMA_BYPASS << DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT) |
+		(DEGAMMA_BYPASS << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
 	WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset,
-	       ((0 << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
-		(0 << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
+	       ((GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
+		(GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
 	WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
-	       ((0 << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
-		(0 << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
+	       ((REGAMMA_BYPASS << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
+		(REGAMMA_BYPASS << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
 	WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
-	       ((0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
-		(0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
+	       ((OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
+		(OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
 	/* XXX match this to the depth of the crtc fmt block, move to modeset? */
 	WREG32(0x1a50 + amdgpu_crtc->crtc_offset, 0);
 
@@ -2267,8 +2272,6 @@ static void dce_v6_0_hide_cursor(struct drm_crtc *crtc)
 	WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
 	       (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
 	       (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
-
-
 }
 
 static void dce_v6_0_show_cursor(struct drm_crtc *crtc)
@@ -2285,7 +2288,6 @@ static void dce_v6_0_show_cursor(struct drm_crtc *crtc)
 	       CUR_CONTROL__CURSOR_EN_MASK |
 	       (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
 	       (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
-
 }
 
 static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc,
@@ -2596,7 +2598,6 @@ static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc,
 				     const struct drm_display_mode *mode,
 				     struct drm_display_mode *adjusted_mode)
 {
-
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct drm_encoder *encoder;
@@ -2669,7 +2670,7 @@ static void dce_v6_0_panic_flush(struct drm_plane *plane)
 
 	/* Disable DC tiling */
 	fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
-	fb_format &= ~GRPH_ARRAY_MODE(0x7);
+	fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
 	WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
 
 }
@@ -2745,7 +2746,6 @@ static int dce_v6_0_early_init(struct amdgpu_ip_block *ip_block)
 static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
 {
 	int r, i;
-	bool ret;
 	struct amdgpu_device *adev = ip_block->adev;
 
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
@@ -2789,8 +2789,7 @@ static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
 			return r;
 	}
 
-	ret = amdgpu_atombios_get_connector_info_from_object_table(adev);
-	if (ret)
+	if (amdgpu_atombios_get_connector_info_from_object_table(adev))
 		amdgpu_display_print_display_setup(adev_to_drm(adev));
 	else
 		return -EINVAL;
@@ -2986,12 +2985,12 @@ static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
 	switch (state) {
 	case AMDGPU_IRQ_STATE_DISABLE:
 		interrupt_mask = RREG32(mmINT_MASK + reg_block);
-		interrupt_mask &= ~VBLANK_INT_MASK;
+		interrupt_mask &= ~INT_MASK__VBLANK_INT_MASK;
 		WREG32(mmINT_MASK + reg_block, interrupt_mask);
 		break;
 	case AMDGPU_IRQ_STATE_ENABLE:
 		interrupt_mask = RREG32(mmINT_MASK + reg_block);
-		interrupt_mask |= VBLANK_INT_MASK;
+		interrupt_mask |= INT_MASK__VBLANK_INT_MASK;
 		WREG32(mmINT_MASK + reg_block, interrupt_mask);
 		break;
 	default:
@@ -3006,28 +3005,28 @@ static void dce_v6_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
 
 }
 
-static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
+static int dce_v6_0_set_hpd_irq_state(struct amdgpu_device *adev,
 					    struct amdgpu_irq_src *src,
-					    unsigned type,
+					    unsigned hpd,
 					    enum amdgpu_interrupt_state state)
 {
 	u32 dc_hpd_int_cntl;
 
-	if (type >= adev->mode_info.num_hpd) {
-		DRM_DEBUG("invalid hdp %d\n", type);
+	if (hpd >= adev->mode_info.num_hpd) {
+		DRM_DEBUG("invalid hpd %d\n", hpd);
 		return 0;
 	}
 
 	switch (state) {
 	case AMDGPU_IRQ_STATE_DISABLE:
-		dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
-		dc_hpd_int_cntl &= ~DC_HPDx_INT_EN;
-		WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+		dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+		dc_hpd_int_cntl &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+		WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl);
 		break;
 	case AMDGPU_IRQ_STATE_ENABLE:
-		dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
-		dc_hpd_int_cntl |= DC_HPDx_INT_EN;
-		WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+		dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+		dc_hpd_int_cntl |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+		WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl);
 		break;
 	default:
 		break;
@@ -3036,7 +3035,7 @@ static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int dce_v6_0_set_crtc_interrupt_state(struct amdgpu_device *adev,
+static int dce_v6_0_set_crtc_irq_state(struct amdgpu_device *adev,
 					     struct amdgpu_irq_src *src,
 					     unsigned type,
 					     enum amdgpu_interrupt_state state)
@@ -3096,7 +3095,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
 	switch (entry->src_data[0]) {
 	case 0: /* vblank */
 		if (disp_int & interrupt_status_offsets[crtc].vblank)
-			WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_ACK);
+			WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_STATUS__VBLANK_ACK_MASK);
 		else
 			DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
 
@@ -3107,7 +3106,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
 		break;
 	case 1: /* vline */
 		if (disp_int & interrupt_status_offsets[crtc].vline)
-			WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_ACK);
+			WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_STATUS__VLINE_ACK_MASK);
 		else
 			DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
 
@@ -3121,7 +3120,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int dce_v6_0_set_pageflip_interrupt_state(struct amdgpu_device *adev,
+static int dce_v6_0_set_pageflip_irq_state(struct amdgpu_device *adev,
 						 struct amdgpu_irq_src *src,
 						 unsigned type,
 						 enum amdgpu_interrupt_state state)
@@ -3172,7 +3171,7 @@ static int dce_v6_0_pageflip_irq(struct amdgpu_device *adev,
 
 	spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
 	works = amdgpu_crtc->pflip_works;
-	if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
+	if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
 		DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
 						"AMDGPU_FLIP_SUBMITTED(%d)\n",
 						amdgpu_crtc->pflip_status,
@@ -3249,12 +3248,10 @@ static const struct amd_ip_funcs dce_v6_0_ip_funcs = {
 	.set_powergating_state = dce_v6_0_set_powergating_state,
 };
 
-static void
-dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
+static void dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
 			  struct drm_display_mode *mode,
 			  struct drm_display_mode *adjusted_mode)
 {
-
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
 
@@ -3274,7 +3271,6 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
 
 static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder)
 {
-
 	struct amdgpu_device *adev = drm_to_adev(encoder->dev);
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
@@ -3314,7 +3310,6 @@ static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder)
 
 static void dce_v6_0_encoder_commit(struct drm_encoder *encoder)
 {
-
 	struct drm_device *dev = encoder->dev;
 	struct amdgpu_device *adev = drm_to_adev(dev);
 
@@ -3325,7 +3320,6 @@ static void dce_v6_0_encoder_commit(struct drm_encoder *encoder)
 
 static void dce_v6_0_encoder_disable(struct drm_encoder *encoder)
 {
-
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig;
 	int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
@@ -3541,17 +3535,17 @@ static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev)
 }
 
 static const struct amdgpu_irq_src_funcs dce_v6_0_crtc_irq_funcs = {
-	.set = dce_v6_0_set_crtc_interrupt_state,
+	.set = dce_v6_0_set_crtc_irq_state,
 	.process = dce_v6_0_crtc_irq,
 };
 
 static const struct amdgpu_irq_src_funcs dce_v6_0_pageflip_irq_funcs = {
-	.set = dce_v6_0_set_pageflip_interrupt_state,
+	.set = dce_v6_0_set_pageflip_irq_state,
 	.process = dce_v6_0_pageflip_irq,
 };
 
 static const struct amdgpu_irq_src_funcs dce_v6_0_hpd_irq_funcs = {
-	.set = dce_v6_0_set_hpd_interrupt_state,
+	.set = dce_v6_0_set_hpd_irq_state,
 	.process = dce_v6_0_hpd_irq,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 07358546581f..e62ccf9eb73d 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -271,7 +271,7 @@ static void dce_v8_0_hpd_int_ack(struct amdgpu_device *adev,
 	u32 tmp;
 
 	if (hpd >= adev->mode_info.num_hpd) {
-		DRM_DEBUG("invalid hdp %d\n", hpd);
+		DRM_DEBUG("invalid hpd %d\n", hpd);
 		return;
 	}
 
@@ -3021,7 +3021,7 @@ static void dce_v8_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
 	}
 }
 
-static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
+static int dce_v8_0_set_hpd_irq_state(struct amdgpu_device *adev,
 					    struct amdgpu_irq_src *src,
 					    unsigned type,
 					    enum amdgpu_interrupt_state state)
@@ -3029,7 +3029,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
 	u32 dc_hpd_int_cntl;
 
 	if (type >= adev->mode_info.num_hpd) {
-		DRM_DEBUG("invalid hdp %d\n", type);
+		DRM_DEBUG("invalid hpd %d\n", type);
 		return 0;
 	}
 
@@ -3051,7 +3051,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int dce_v8_0_set_crtc_interrupt_state(struct amdgpu_device *adev,
+static int dce_v8_0_set_crtc_irq_state(struct amdgpu_device *adev,
 					     struct amdgpu_irq_src *src,
 					     unsigned type,
 					     enum amdgpu_interrupt_state state)
@@ -3136,7 +3136,7 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int dce_v8_0_set_pageflip_interrupt_state(struct amdgpu_device *adev,
+static int dce_v8_0_set_pageflip_irq_state(struct amdgpu_device *adev,
 						 struct amdgpu_irq_src *src,
 						 unsigned type,
 						 enum amdgpu_interrupt_state state)
@@ -3547,17 +3547,17 @@ static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev)
 }
 
 static const struct amdgpu_irq_src_funcs dce_v8_0_crtc_irq_funcs = {
-	.set = dce_v8_0_set_crtc_interrupt_state,
+	.set = dce_v8_0_set_crtc_irq_state,
 	.process = dce_v8_0_crtc_irq,
 };
 
 static const struct amdgpu_irq_src_funcs dce_v8_0_pageflip_irq_funcs = {
-	.set = dce_v8_0_set_pageflip_interrupt_state,
+	.set = dce_v8_0_set_pageflip_irq_state,
 	.process = dce_v8_0_pageflip_irq,
 };
 
 static const struct amdgpu_irq_src_funcs dce_v8_0_hpd_irq_funcs = {
-	.set = dce_v8_0_set_hpd_interrupt_state,
+	.set = dce_v8_0_set_hpd_irq_state,
 	.process = dce_v8_0_hpd_irq,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 23e6a05359c2..75ea071744eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -368,11 +368,6 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = {
 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR),
 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST),
 	/* cp header registers */
-	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
-	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
-	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
-	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
-	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP),
 	/* SE status registers */
 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
@@ -421,7 +416,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = {
 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET),
-	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS)
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS),
+	/* cp header registers */
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
 };
 
 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
@@ -448,7 +452,32 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR),
 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI),
 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO),
-	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI)
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI),
+	/* gfx header registers */
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
 };
 
 static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
@@ -4296,9 +4325,7 @@ static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev)
 static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
 				    volatile u32 *buffer)
 {
-	u32 count = 0, i;
-	const struct cs_section_def *sect = NULL;
-	const struct cs_extent_def *ext = NULL;
+	u32 count = 0;
 	int ctx_reg_offset;
 
 	if (adev->gfx.rlc.cs_data == NULL)
@@ -4306,39 +4333,15 @@ static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
 	if (buffer == NULL)
 		return;
 
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+	count = amdgpu_gfx_csb_preamble_start(buffer);
+	count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
 
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
-	buffer[count++] = cpu_to_le32(0x80000000);
-	buffer[count++] = cpu_to_le32(0x80000000);
-
-	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
-		for (ext = sect->section; ext->extent != NULL; ++ext) {
-			if (sect->id == SECT_CONTEXT) {
-				buffer[count++] =
-					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
-				buffer[count++] = cpu_to_le32(ext->reg_index -
-						PACKET3_SET_CONTEXT_REG_START);
-				for (i = 0; i < ext->reg_count; i++)
-					buffer[count++] = cpu_to_le32(ext->extent[i]);
-			} else {
-				return;
-			}
-		}
-	}
-
-	ctx_reg_offset =
-		SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+	ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 	buffer[count++] = cpu_to_le32(ctx_reg_offset);
 	buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
 
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
-	buffer[count++] = cpu_to_le32(0);
+	amdgpu_gfx_csb_preamble_end(buffer, count);
 }
 
 static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
@@ -4752,6 +4755,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
 	int i, j, k, r, ring_id = 0;
 	int xcc_id = 0;
 	struct amdgpu_device *adev = ip_block->adev;
+	int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
 
 	INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
 
@@ -4763,7 +4767,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
 	case IP_VERSION(10, 1, 4):
 		adev->gfx.me.num_me = 1;
 		adev->gfx.me.num_pipe_per_me = 1;
-		adev->gfx.me.num_queue_per_pipe = 1;
+		adev->gfx.me.num_queue_per_pipe = 8;
 		adev->gfx.mec.num_mec = 2;
 		adev->gfx.mec.num_pipe_per_mec = 4;
 		adev->gfx.mec.num_queue_per_pipe = 8;
@@ -4778,7 +4782,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
 	case IP_VERSION(10, 3, 7):
 		adev->gfx.me.num_me = 1;
 		adev->gfx.me.num_pipe_per_me = 2;
-		adev->gfx.me.num_queue_per_pipe = 1;
+		adev->gfx.me.num_queue_per_pipe = 2;
 		adev->gfx.mec.num_mec = 2;
 		adev->gfx.mec.num_pipe_per_mec = 4;
 		adev->gfx.mec.num_queue_per_pipe = 4;
@@ -4800,7 +4804,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
 		adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex);
 		if (adev->gfx.me_fw_version >= 101 &&
 		    adev->gfx.pfp_fw_version  >= 158 &&
-		    adev->gfx.mec_fw_version >= 152) {
+		    adev->gfx.mec_fw_version >= 151) {
 			adev->gfx.enable_cleaner_shader = true;
 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
 			if (r) {
@@ -4810,7 +4814,9 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
 		}
 		break;
 	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 1):
 	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 3):
 	case IP_VERSION(10, 3, 4):
 	case IP_VERSION(10, 3, 5):
 		adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
@@ -4826,6 +4832,34 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
 			}
 		}
 		break;
+	case IP_VERSION(10, 3, 6):
+		adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+		adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+		if (adev->gfx.me_fw_version >= 14 &&
+		    adev->gfx.pfp_fw_version >= 17 &&
+		    adev->gfx.mec_fw_version >= 24) {
+			adev->gfx.enable_cleaner_shader = true;
+			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+			if (r) {
+				adev->gfx.enable_cleaner_shader = false;
+				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+			}
+		}
+		break;
+	case IP_VERSION(10, 3, 7):
+		adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+		adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+		if (adev->gfx.me_fw_version >= 4 &&
+		    adev->gfx.pfp_fw_version >= 9 &&
+		    adev->gfx.mec_fw_version >= 12) {
+			adev->gfx.enable_cleaner_shader = true;
+			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+			if (r) {
+				adev->gfx.enable_cleaner_shader = false;
+				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+			}
+		}
+		break;
 	default:
 		adev->gfx.enable_cleaner_shader = false;
 		break;
@@ -4886,7 +4920,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
 
 	/* set up the gfx ring */
 	for (i = 0; i < adev->gfx.me.num_me; i++) {
-		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+		for (j = 0; j < num_queue_per_pipe; j++) {
 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
 				if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
 					continue;
@@ -9645,9 +9679,14 @@ static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printe
 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
 				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
 				for (reg = 0; reg < reg_count; reg++) {
-					drm_printf(p, "%-50s \t 0x%08x\n",
-						   gc_cp_reg_list_10[reg].reg_name,
-						   adev->gfx.ip_dump_compute_queues[index + reg]);
+					if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+						drm_printf(p, "%-50s \t 0x%08x\n",
+							   "mmCP_MEC_ME2_HEADER_DUMP",
+							   adev->gfx.ip_dump_compute_queues[index + reg]);
+					else
+						drm_printf(p, "%-50s \t 0x%08x\n",
+							   gc_cp_reg_list_10[reg].reg_name,
+							   adev->gfx.ip_dump_compute_queues[index + reg]);
 				}
 				index += reg_count;
 			}
@@ -9708,9 +9747,13 @@ static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block)
 				nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
 
 				for (reg = 0; reg < reg_count; reg++) {
-					adev->gfx.ip_dump_compute_queues[index + reg] =
-						RREG32(SOC15_REG_ENTRY_OFFSET(
-							gc_cp_reg_list_10[reg]));
+					if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+						adev->gfx.ip_dump_compute_queues[index + reg] =
+							RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
+					else
+						adev->gfx.ip_dump_compute_queues[index + reg] =
+							RREG32(SOC15_REG_ENTRY_OFFSET(
+								       gc_cp_reg_list_10[reg]));
 				}
 				index += reg_count;
 			}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 2a5c2a1ae3c7..afd6d59164bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -48,6 +48,8 @@
 #include "gfx_v11_0_3.h"
 #include "nbio_v4_3.h"
 #include "mes_v11_0.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
 
 #define GFX11_NUM_GFX_RINGS		1
 #define GFX11_MEC_HPD_SIZE	2048
@@ -177,9 +179,13 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
 	/* cp header registers */
-	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
-	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
-	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
 	/* SE status registers */
 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
@@ -230,7 +236,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = {
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
-	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS)
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
+	/* cp header registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
 };
 
 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
@@ -259,7 +274,24 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
-	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+	/* cp header registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
 };
 
 static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
@@ -580,33 +612,18 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 	memset(&ib, 0, sizeof(ib));
 
-	if (ring->is_mes_queue) {
-		uint32_t padding, offset;
-
-		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
-		padding = amdgpu_mes_ctx_get_offs(ring,
-						  AMDGPU_MES_CTX_PADDING_OFFS);
-
-		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
-		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
-		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
-		*cpu_ptr = cpu_to_le32(0xCAFEDEAD);
-	} else {
-		r = amdgpu_device_wb_get(adev, &index);
-		if (r)
-			return r;
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r)
+		return r;
 
-		gpu_addr = adev->wb.gpu_addr + (index * 4);
-		adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
-		cpu_ptr = &adev->wb.wb[index];
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+	cpu_ptr = &adev->wb.wb[index];
 
-		r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
-		if (r) {
-			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
-			goto err1;
-		}
+	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+		goto err1;
 	}
 
 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
@@ -633,12 +650,10 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	else
 		r = -EINVAL;
 err2:
-	if (!ring->is_mes_queue)
-		amdgpu_ib_free(&ib, NULL);
+	amdgpu_ib_free(&ib, NULL);
 	dma_fence_put(f);
 err1:
-	if (!ring->is_mes_queue)
-		amdgpu_device_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 	return r;
 }
 
@@ -833,9 +848,7 @@ static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
 				    volatile u32 *buffer)
 {
-	u32 count = 0, i;
-	const struct cs_section_def *sect = NULL;
-	const struct cs_extent_def *ext = NULL;
+	u32 count = 0;
 	int ctx_reg_offset;
 
 	if (adev->gfx.rlc.cs_data == NULL)
@@ -843,39 +856,15 @@ static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
 	if (buffer == NULL)
 		return;
 
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
-	buffer[count++] = cpu_to_le32(0x80000000);
-	buffer[count++] = cpu_to_le32(0x80000000);
-
-	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
-		for (ext = sect->section; ext->extent != NULL; ++ext) {
-			if (sect->id == SECT_CONTEXT) {
-				buffer[count++] =
-					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
-				buffer[count++] = cpu_to_le32(ext->reg_index -
-						PACKET3_SET_CONTEXT_REG_START);
-				for (i = 0; i < ext->reg_count; i++)
-					buffer[count++] = cpu_to_le32(ext->extent[i]);
-			} else {
-				return;
-			}
-		}
-	}
+	count = amdgpu_gfx_csb_preamble_start(buffer);
+	count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
 
-	ctx_reg_offset =
-		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+	ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 	buffer[count++] = cpu_to_le32(ctx_reg_offset);
 	buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
 
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
-	buffer[count++] = cpu_to_le32(0);
+	amdgpu_gfx_csb_preamble_end(buffer, count);
 }
 
 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
@@ -1056,14 +1045,21 @@ static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
 #define MQD_FWWORKAREA_SIZE       484
 #define MQD_FWWORKAREA_ALIGNMENT  256
 
-static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
 					 struct amdgpu_gfx_shadow_info *shadow_info)
 {
-	if (adev->gfx.cp_gfx_shadow) {
-		shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
-		shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
-		shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
-		shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+	shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+	shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+	shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+	shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+}
+
+static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+					 struct amdgpu_gfx_shadow_info *shadow_info,
+					 bool skip_check)
+{
+	if (adev->gfx.cp_gfx_shadow || skip_check) {
+		gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
 		return 0;
 	} else {
 		memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
@@ -1136,6 +1132,10 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
 
 	ring->ring_obj = NULL;
 	ring->use_doorbell = true;
+	if (adev->gfx.disable_kq) {
+		ring->no_scheduler = true;
+		ring->no_user_submission = true;
+	}
 
 	if (!ring_id)
 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
@@ -1568,24 +1568,18 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev)
 
 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
 {
-	int i, j, k, r, ring_id = 0;
+	int i, j, k, r, ring_id;
 	int xcc_id = 0;
 	struct amdgpu_device *adev = ip_block->adev;
+	int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
 
 	INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
 
 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
 	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 1):
 	case IP_VERSION(11, 0, 2):
 	case IP_VERSION(11, 0, 3):
-		adev->gfx.me.num_me = 1;
-		adev->gfx.me.num_pipe_per_me = 1;
-		adev->gfx.me.num_queue_per_pipe = 1;
-		adev->gfx.mec.num_mec = 1;
-		adev->gfx.mec.num_pipe_per_mec = 4;
-		adev->gfx.mec.num_queue_per_pipe = 4;
-		break;
-	case IP_VERSION(11, 0, 1):
 	case IP_VERSION(11, 0, 4):
 	case IP_VERSION(11, 5, 0):
 	case IP_VERSION(11, 5, 1):
@@ -1593,7 +1587,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
 	case IP_VERSION(11, 5, 3):
 		adev->gfx.me.num_me = 1;
 		adev->gfx.me.num_pipe_per_me = 1;
-		adev->gfx.me.num_queue_per_pipe = 1;
+		adev->gfx.me.num_queue_per_pipe = 2;
 		adev->gfx.mec.num_mec = 1;
 		adev->gfx.mec.num_pipe_per_mec = 4;
 		adev->gfx.mec.num_queue_per_pipe = 4;
@@ -1612,6 +1606,35 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
 	case IP_VERSION(11, 0, 0):
 	case IP_VERSION(11, 0, 2):
 	case IP_VERSION(11, 0, 3):
+		if (!adev->gfx.disable_uq &&
+		    adev->gfx.me_fw_version  >= 2390 &&
+		    adev->gfx.pfp_fw_version >= 2530 &&
+		    adev->gfx.mec_fw_version >= 2600 &&
+		    adev->mes.fw_version[0] >= 120) {
+			adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+			adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+		}
+		break;
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 4):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
+	case IP_VERSION(11, 5, 3):
+		/* add firmware version checks here */
+		if (0 && !adev->gfx.disable_uq) {
+			adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+			adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+		}
+		break;
+	default:
+		break;
+	}
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
 		if (adev->gfx.me_fw_version  >= 2280 &&
@@ -1640,6 +1663,34 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
 			}
 		}
 		break;
+	case IP_VERSION(11, 5, 2):
+		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+		if (adev->gfx.me_fw_version  >= 12 &&
+		    adev->gfx.pfp_fw_version >= 15 &&
+		    adev->gfx.mec_fw_version >= 15) {
+			adev->gfx.enable_cleaner_shader = true;
+			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+			if (r) {
+				adev->gfx.enable_cleaner_shader = false;
+				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+			}
+		}
+		break;
+	case IP_VERSION(11, 5, 3):
+		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+		if (adev->gfx.me_fw_version  >= 7 &&
+		    adev->gfx.pfp_fw_version >= 8 &&
+		    adev->gfx.mec_fw_version >= 8) {
+			adev->gfx.enable_cleaner_shader = true;
+			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+			if (r) {
+				adev->gfx.enable_cleaner_shader = false;
+				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+			}
+		}
+		break;
 	default:
 		adev->gfx.enable_cleaner_shader = false;
 		break;
@@ -1701,37 +1752,42 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
 		return r;
 	}
 
-	/* set up the gfx ring */
-	for (i = 0; i < adev->gfx.me.num_me; i++) {
-		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
-			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
-				if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
-					continue;
-
-				r = gfx_v11_0_gfx_ring_init(adev, ring_id,
-							    i, k, j);
-				if (r)
-					return r;
-				ring_id++;
+	if (adev->gfx.num_gfx_rings) {
+		ring_id = 0;
+		/* set up the gfx ring */
+		for (i = 0; i < adev->gfx.me.num_me; i++) {
+			for (j = 0; j < num_queue_per_pipe; j++) {
+				for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+					if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+						continue;
+
+					r = gfx_v11_0_gfx_ring_init(adev, ring_id,
+								    i, k, j);
+					if (r)
+						return r;
+					ring_id++;
+				}
 			}
 		}
 	}
 
-	ring_id = 0;
-	/* set up the compute queues - allocate horizontally across pipes */
-	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
-		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
-			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
-				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
-								     k, j))
-					continue;
+	if (adev->gfx.num_compute_rings) {
+		ring_id = 0;
+		/* set up the compute queues - allocate horizontally across pipes */
+		for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+			for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+				for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+					if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+									     k, j))
+						continue;
 
-				r = gfx_v11_0_compute_ring_init(adev, ring_id,
-								i, k, j);
-				if (r)
-					return r;
+					r = gfx_v11_0_compute_ring_init(adev, ring_id,
+									i, k, j);
+					if (r)
+						return r;
 
-				ring_id++;
+					ring_id++;
+				}
 			}
 		}
 	}
@@ -4061,6 +4117,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
 #ifdef __BIG_ENDIAN
 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
 #endif
+	if (prop->tmz_queue)
+		tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
 	mqd->cp_gfx_hqd_cntl = tmp;
 
 	/* set up cp_doorbell_control */
@@ -4081,6 +4139,16 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
 	/* active the queue */
 	mqd->cp_gfx_hqd_active = 1;
 
+	/* set gfx UQ items */
+	mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+	mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+	mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr);
+	mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr);
+	mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+	mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+	mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+	mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
 	return 0;
 }
 
@@ -4205,6 +4273,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
 			    prop->allow_tunneling);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+	if (prop->tmz_queue)
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
 	mqd->cp_hqd_pq_control = tmp;
 
 	/* set the wb address whether it's enabled or not */
@@ -4256,6 +4326,10 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
 
 	mqd->cp_hqd_active = prop->hqd_active;
 
+	/* set UQ fenceaddress */
+	mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+	mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
 	return 0;
 }
 
@@ -4509,11 +4583,23 @@ static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
 			return r;
 	}
 
-	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
-		ring = &adev->gfx.gfx_ring[i];
-		r = amdgpu_ring_test_helper(ring);
-		if (r)
-			return r;
+	if (adev->gfx.disable_kq) {
+		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+			ring = &adev->gfx.gfx_ring[i];
+			/* we don't want to set ring->ready */
+			r = amdgpu_ring_test_ring(ring);
+			if (r)
+				return r;
+		}
+		if (amdgpu_async_gfx_ring)
+			amdgpu_gfx_disable_kgq(adev, 0);
+	} else {
+		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+			ring = &adev->gfx.gfx_ring[i];
+			r = amdgpu_ring_test_helper(ring);
+			if (r)
+				return r;
+		}
 	}
 
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
@@ -4722,6 +4808,49 @@ static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
 	return r;
 }
 
+static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
+					      bool enable)
+{
+	unsigned int irq_type;
+	int m, p, r;
+
+	if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
+		for (m = 0; m < adev->gfx.me.num_me; m++) {
+			for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
+				irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
+				if (enable)
+					r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				else
+					r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				if (r)
+					return r;
+			}
+		}
+	}
+
+	if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
+		for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
+			for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
+				irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+					+ (m * adev->gfx.mec.num_pipe_per_mec)
+					+ p;
+				if (enable)
+					r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				else
+					r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				if (r)
+					return r;
+			}
+		}
+	}
+
+	return 0;
+}
+
 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
@@ -4731,9 +4860,11 @@ static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
 	amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+	gfx_v11_0_set_userq_eop_interrupts(adev, false);
 
 	if (!adev->no_hw_access) {
-		if (amdgpu_async_gfx_ring) {
+		if (amdgpu_async_gfx_ring &&
+		    !adev->gfx.disable_kq) {
 			if (amdgpu_gfx_disable_kgq(adev, 0))
 				DRM_ERROR("KGQ disable failed\n");
 		}
@@ -5059,11 +5190,36 @@ static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
 
+	switch (amdgpu_user_queue) {
+	case -1:
+	case 0:
+	default:
+		adev->gfx.disable_kq = false;
+		adev->gfx.disable_uq = true;
+		break;
+	case 1:
+		adev->gfx.disable_kq = false;
+		adev->gfx.disable_uq = false;
+		break;
+	case 2:
+		adev->gfx.disable_kq = true;
+		adev->gfx.disable_uq = false;
+		break;
+	}
+
 	adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
 
-	adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
-	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
-					  AMDGPU_MAX_COMPUTE_RINGS);
+	if (adev->gfx.disable_kq) {
+		/* We need one GFX ring temporarily to set up
+		 * the clear state.
+		 */
+		adev->gfx.num_gfx_rings = 1;
+		adev->gfx.num_compute_rings = 0;
+	} else {
+		adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
+		adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+						  AMDGPU_MAX_COMPUTE_RINGS);
+	}
 
 	gfx_v11_0_set_kiq_pm4_funcs(adev);
 	gfx_v11_0_set_ring_funcs(adev);
@@ -5094,6 +5250,11 @@ static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block)
 	r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
 	if (r)
 		return r;
+
+	r = gfx_v11_0_set_userq_eop_interrupts(adev, true);
+	if (r)
+		return r;
+
 	return 0;
 }
 
@@ -5691,10 +5852,6 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 				    (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
 	}
 
-	if (ring->is_mes_queue)
-		/* inherit vmid from mqd */
-		control |= 0x400000;
-
 	amdgpu_ring_write(ring, header);
 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
 	amdgpu_ring_write(ring,
@@ -5714,10 +5871,6 @@ static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
 
-	if (ring->is_mes_queue)
-		/* inherit vmid from mqd */
-		control |= 0x40000000;
-
 	/* Currently, there is a high possibility to get wave ID mismatch
 	 * between ME and GDS, leading to a hw deadlock, because ME generates
 	 * different wave IDs than the GDS expects. This situation happens
@@ -5775,8 +5928,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 	amdgpu_ring_write(ring, upper_32_bits(addr));
 	amdgpu_ring_write(ring, lower_32_bits(seq));
 	amdgpu_ring_write(ring, upper_32_bits(seq));
-	amdgpu_ring_write(ring, ring->is_mes_queue ?
-			 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
+	amdgpu_ring_write(ring, 0);
 }
 
 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@@ -5804,10 +5956,7 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					 unsigned vmid, uint64_t pd_addr)
 {
-	if (ring->is_mes_queue)
-		gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
-	else
-		amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
 
 	/* compute doesn't have PFP */
 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
@@ -6036,28 +6185,13 @@ static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
 	void *de_payload_cpu_addr;
 	int cnt;
 
-	if (ring->is_mes_queue) {
-		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
-				  gfx[0].gfx_meta_data) +
-			offsetof(struct v10_gfx_meta_data, de_payload);
-		de_payload_gpu_addr =
-			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		de_payload_cpu_addr =
-			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
-		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
-				  gfx[0].gds_backup) +
-			offsetof(struct v10_gfx_meta_data, de_payload);
-		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-	} else {
-		offset = offsetof(struct v10_gfx_meta_data, de_payload);
-		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
-		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+	offset = offsetof(struct v10_gfx_meta_data, de_payload);
+	de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+	de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
 
-		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
-				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
-				 PAGE_SIZE);
-	}
+	gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+			 AMDGPU_CSA_SIZE - adev->gds.gds_size,
+			 PAGE_SIZE);
 
 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@@ -6296,25 +6430,23 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
 			     struct amdgpu_irq_src *source,
 			     struct amdgpu_iv_entry *entry)
 {
-	int i;
+	u32 doorbell_offset = entry->src_data[0];
 	u8 me_id, pipe_id, queue_id;
 	struct amdgpu_ring *ring;
-	uint32_t mes_queue_id = entry->src_data[0];
+	int i;
 
 	DRM_DEBUG("IH: CP EOP\n");
 
-	if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
-		struct amdgpu_mes_queue *queue;
-
-		mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+	if (adev->enable_mes && doorbell_offset) {
+		struct amdgpu_userq_fence_driver *fence_drv = NULL;
+		struct xarray *xa = &adev->userq_xa;
+		unsigned long flags;
 
-		spin_lock(&adev->mes.queue_id_lock);
-		queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
-		if (queue) {
-			DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
-			amdgpu_fence_process(queue->ring);
-		}
-		spin_unlock(&adev->mes.queue_id_lock);
+		xa_lock_irqsave(xa, flags);
+		fence_drv = xa_load(xa, doorbell_offset);
+		if (fence_drv)
+			amdgpu_userq_fence_driver_process(fence_drv);
+		xa_unlock_irqrestore(xa, flags);
 	} else {
 		me_id = (entry->ring_id & 0x0c) >> 2;
 		pipe_id = (entry->ring_id & 0x03) >> 0;
@@ -6481,27 +6613,29 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
 	pipe_id = (entry->ring_id & 0x03) >> 0;
 	queue_id = (entry->ring_id & 0x70) >> 4;
 
-	switch (me_id) {
-	case 0:
-		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
-			ring = &adev->gfx.gfx_ring[i];
-			if (ring->me == me_id && ring->pipe == pipe_id &&
-			    ring->queue == queue_id)
-				drm_sched_fault(&ring->sched);
-		}
-		break;
-	case 1:
-	case 2:
-		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-			ring = &adev->gfx.compute_ring[i];
-			if (ring->me == me_id && ring->pipe == pipe_id &&
-			    ring->queue == queue_id)
-				drm_sched_fault(&ring->sched);
+	if (!adev->gfx.disable_kq) {
+		switch (me_id) {
+		case 0:
+			for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+				ring = &adev->gfx.gfx_ring[i];
+				if (ring->me == me_id && ring->pipe == pipe_id &&
+				    ring->queue == queue_id)
+					drm_sched_fault(&ring->sched);
+			}
+			break;
+		case 1:
+		case 2:
+			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+				ring = &adev->gfx.compute_ring[i];
+				if (ring->me == me_id && ring->pipe == pipe_id &&
+				    ring->queue == queue_id)
+					drm_sched_fault(&ring->sched);
+			}
+			break;
+		default:
+			BUG();
+			break;
 		}
-		break;
-	default:
-		BUG();
-		break;
 	}
 }
 
@@ -6609,6 +6743,69 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
 }
 
+static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
+{
+	/* Disable the pipe reset until the CPFW fully support it.*/
+	dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
+	return false;
+}
+
+
+static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t reset_pipe = 0, clean_pipe = 0;
+	int r;
+
+	if (!gfx_v11_pipe_reset_support(adev))
+		return -EOPNOTSUPP;
+
+	gfx_v11_0_set_safe_mode(adev, 0);
+	mutex_lock(&adev->srbm_mutex);
+	soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+	switch (ring->pipe) {
+	case 0:
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   PFP_PIPE0_RESET, 1);
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   ME_PIPE0_RESET, 1);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   PFP_PIPE0_RESET, 0);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   ME_PIPE0_RESET, 0);
+		break;
+	case 1:
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   PFP_PIPE1_RESET, 1);
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   ME_PIPE1_RESET, 1);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   PFP_PIPE1_RESET, 0);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   ME_PIPE1_RESET, 0);
+		break;
+	default:
+		break;
+	}
+
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
+
+	r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
+						RS64_FW_UC_START_ADDR_LO;
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	gfx_v11_0_unset_safe_mode(adev, 0);
+
+	dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name,
+			r == 0 ? "successfully" : "failed");
+	/* FIXME: Sometimes driver can't cache the ME firmware start PC correctly,
+	 * so the pipe reset status relies on the later gfx ring test result.
+	 */
+	return 0;
+}
+
 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -6618,8 +6815,13 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
 		return -EINVAL;
 
 	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
-	if (r)
-		return r;
+	if (r) {
+
+		dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
+		r = gfx_v11_reset_gfx_pipe(ring);
+		if (r)
+			return r;
+	}
 
 	r = gfx_v11_0_kgq_init_queue(ring, true);
 	if (r) {
@@ -6636,6 +6838,136 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
 	return amdgpu_ring_test_ring(ring);
 }
 
+static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
+{
+
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t reset_pipe = 0, clean_pipe = 0;
+	int r;
+
+	if (!gfx_v11_pipe_reset_support(adev))
+		return -EOPNOTSUPP;
+
+	gfx_v11_0_set_safe_mode(adev, 0);
+	mutex_lock(&adev->srbm_mutex);
+	soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+	reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+	clean_pipe = reset_pipe;
+
+	if (adev->gfx.rs64_enable) {
+
+		switch (ring->pipe) {
+		case 0:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE0_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE0_RESET, 0);
+			break;
+		case 1:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE1_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE1_RESET, 0);
+			break;
+		case 2:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE2_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE2_RESET, 0);
+			break;
+		case 3:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE3_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE3_RESET, 0);
+			break;
+		default:
+			break;
+		}
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+		r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
+					RS64_FW_UC_START_ADDR_LO;
+	} else {
+		if (ring->me == 1) {
+			switch (ring->pipe) {
+			case 0:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE0_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE0_RESET, 0);
+				break;
+			case 1:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE1_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE1_RESET, 0);
+				break;
+			case 2:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE2_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE2_RESET, 0);
+				break;
+			case 3:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE3_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE3_RESET, 0);
+				break;
+			default:
+				break;
+			}
+			/* mec1 fw pc: CP_MEC1_INSTR_PNTR */
+		} else {
+			switch (ring->pipe) {
+			case 0:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE0_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE0_RESET, 0);
+				break;
+			case 1:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE1_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE1_RESET, 0);
+				break;
+			case 2:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE2_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE2_RESET, 0);
+				break;
+			case 3:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE3_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE3_RESET, 0);
+				break;
+			default:
+				break;
+			}
+			/* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */
+		}
+		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
+		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+		r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR));
+	}
+
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	gfx_v11_0_unset_safe_mode(adev, 0);
+
+	dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name,
+			r == 0 ? "successfully" : "failed");
+	/*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe
+	 * reset status relies on the compute ring test result.
+	 */
+	return 0;
+}
+
 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -6646,8 +6978,10 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
 
 	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
 	if (r) {
-		dev_err(adev->dev, "reset via MMIO failed %d\n", r);
-		return r;
+		dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
+		r = gfx_v11_0_reset_compute_pipe(ring);
+		if (r)
+			return r;
 	}
 
 	r = gfx_v11_0_kcq_init_queue(ring, true);
@@ -6693,9 +7027,14 @@ static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printe
 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
 				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
 				for (reg = 0; reg < reg_count; reg++) {
-					drm_printf(p, "%-50s \t 0x%08x\n",
-						   gc_cp_reg_list_11[reg].reg_name,
-						   adev->gfx.ip_dump_compute_queues[index + reg]);
+					if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP)
+						drm_printf(p, "%-50s \t 0x%08x\n",
+							   "regCP_MEC_ME2_HEADER_DUMP",
+							   adev->gfx.ip_dump_compute_queues[index + reg]);
+					else
+						drm_printf(p, "%-50s \t 0x%08x\n",
+							   gc_cp_reg_list_11[reg].reg_name,
+							   adev->gfx.ip_dump_compute_queues[index + reg]);
 				}
 				index += reg_count;
 			}
@@ -6755,9 +7094,16 @@ static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block)
 				/* ME0 is for GFX so start from 1 for CP */
 				soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
 				for (reg = 0; reg < reg_count; reg++) {
-					adev->gfx.ip_dump_compute_queues[index + reg] =
-						RREG32(SOC15_REG_ENTRY_OFFSET(
-							gc_cp_reg_list_11[reg]));
+					if (i &&
+					    gc_cp_reg_list_11[reg].reg_offset ==
+						    regCP_MEC_ME1_HEADER_DUMP)
+						adev->gfx.ip_dump_compute_queues[index + reg] =
+							RREG32(SOC15_REG_OFFSET(GC, 0,
+							       regCP_MEC_ME2_HEADER_DUMP));
+					else
+						adev->gfx.ip_dump_compute_queues[index + reg] =
+							RREG32(SOC15_REG_ENTRY_OFFSET(
+								       gc_cp_reg_list_11[reg]));
 				}
 				index += reg_count;
 			}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 62a257a4a3e9..f09d96bfee16 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -44,6 +44,8 @@
 #include "gfx_v12_0.h"
 #include "nbif_v6_3_1.h"
 #include "mes_v12_0.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
 
 #define GFX12_NUM_GFX_RINGS	1
 #define GFX12_MEC_HPD_SIZE	2048
@@ -133,11 +135,14 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = {
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0),
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1),
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR),
-
 	/* cp header registers */
-	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
-	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
-	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
 	/* SE status registers */
 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
@@ -186,7 +191,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = {
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
-	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS)
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
+	/* cp header registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
 };
 
 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
@@ -215,7 +229,24 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
-	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+	/* cp header registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
 };
 
 static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = {
@@ -475,33 +506,18 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 	memset(&ib, 0, sizeof(ib));
 
-	if (ring->is_mes_queue) {
-		uint32_t padding, offset;
-
-		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
-		padding = amdgpu_mes_ctx_get_offs(ring,
-						  AMDGPU_MES_CTX_PADDING_OFFS);
-
-		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
-		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
-		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
-		*cpu_ptr = cpu_to_le32(0xCAFEDEAD);
-	} else {
-		r = amdgpu_device_wb_get(adev, &index);
-		if (r)
-			return r;
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r)
+		return r;
 
-		gpu_addr = adev->wb.gpu_addr + (index * 4);
-		adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
-		cpu_ptr = &adev->wb.wb[index];
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+	cpu_ptr = &adev->wb.wb[index];
 
-		r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
-		if (r) {
-			dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
-			goto err1;
-		}
+	r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
+	if (r) {
+		dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
+		goto err1;
 	}
 
 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
@@ -528,12 +544,10 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	else
 		r = -EINVAL;
 err2:
-	if (!ring->is_mes_queue)
-		amdgpu_ib_free(&ib, NULL);
+	amdgpu_ib_free(&ib, NULL);
 	dma_fence_put(f);
 err1:
-	if (!ring->is_mes_queue)
-		amdgpu_device_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 	return r;
 }
 
@@ -881,6 +895,34 @@ static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev,
 	soc24_grbm_select(adev, me, pipe, q, vm);
 }
 
+/* all sizes are in bytes */
+#define MQD_SHADOW_BASE_SIZE      73728
+#define MQD_SHADOW_BASE_ALIGNMENT 256
+#define MQD_FWWORKAREA_SIZE       484
+#define MQD_FWWORKAREA_ALIGNMENT  256
+
+static void gfx_v12_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
+						  struct amdgpu_gfx_shadow_info *shadow_info)
+{
+	shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+	shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+	shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+	shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+}
+
+static int gfx_v12_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+					 struct amdgpu_gfx_shadow_info *shadow_info,
+					 bool skip_check)
+{
+	if (adev->gfx.cp_gfx_shadow || skip_check) {
+		gfx_v12_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
+		return 0;
+	}
+
+	memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
+	return -EINVAL;
+}
+
 static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
 	.get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter,
 	.select_se_sh = &gfx_v12_0_select_se_sh,
@@ -889,6 +931,7 @@ static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
 	.read_wave_vgprs = &gfx_v12_0_read_wave_vgprs,
 	.select_me_pipe_q = &gfx_v12_0_select_me_pipe_q,
 	.update_perfmon_mgcg = &gfx_v12_0_update_perf_clk,
+	.get_gfx_shadow_info = &gfx_v12_0_get_gfx_shadow_info,
 };
 
 static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev)
@@ -1346,6 +1389,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
 	unsigned num_compute_rings;
 	int xcc_id = 0;
 	struct amdgpu_device *adev = ip_block->adev;
+	int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
 
 	INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
 
@@ -1354,7 +1398,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
 	case IP_VERSION(12, 0, 1):
 		adev->gfx.me.num_me = 1;
 		adev->gfx.me.num_pipe_per_me = 1;
-		adev->gfx.me.num_queue_per_pipe = 1;
+		adev->gfx.me.num_queue_per_pipe = 8;
 		adev->gfx.mec.num_mec = 1;
 		adev->gfx.mec.num_pipe_per_mec = 2;
 		adev->gfx.mec.num_queue_per_pipe = 4;
@@ -1372,6 +1416,22 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
 	case IP_VERSION(12, 0, 0):
 	case IP_VERSION(12, 0, 1):
+		if (!adev->gfx.disable_uq &&
+		    adev->gfx.me_fw_version  >= 2780 &&
+		    adev->gfx.pfp_fw_version >= 2840 &&
+		    adev->gfx.mec_fw_version >= 3050 &&
+		    adev->mes.fw_version[0] >= 123) {
+			adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+			adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+		}
+		break;
+	default:
+		break;
+	}
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 0, 1):
 		if (adev->gfx.me_fw_version  >= 2480 &&
 		    adev->gfx.pfp_fw_version >= 2530 &&
 		    adev->gfx.mec_fw_version >= 2680 &&
@@ -1383,11 +1443,13 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
 		break;
 	}
 
-	/* recalculate compute rings to use based on hardware configuration */
-	num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
-			     adev->gfx.mec.num_queue_per_pipe) / 2;
-	adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
-					  num_compute_rings);
+	if (adev->gfx.num_compute_rings) {
+		/* recalculate compute rings to use based on hardware configuration */
+		num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
+				     adev->gfx.mec.num_queue_per_pipe) / 2;
+		adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
+						  num_compute_rings);
+	}
 
 	/* EOP Event */
 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
@@ -1433,37 +1495,41 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
 		return r;
 	}
 
-	/* set up the gfx ring */
-	for (i = 0; i < adev->gfx.me.num_me; i++) {
-		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
-			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
-				if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
-					continue;
-
-				r = gfx_v12_0_gfx_ring_init(adev, ring_id,
-							    i, k, j);
-				if (r)
-					return r;
-				ring_id++;
+	if (adev->gfx.num_gfx_rings) {
+		/* set up the gfx ring */
+		for (i = 0; i < adev->gfx.me.num_me; i++) {
+			for (j = 0; j < num_queue_per_pipe; j++) {
+				for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+					if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+						continue;
+
+					r = gfx_v12_0_gfx_ring_init(adev, ring_id,
+								    i, k, j);
+					if (r)
+						return r;
+					ring_id++;
+				}
 			}
 		}
 	}
 
-	ring_id = 0;
-	/* set up the compute queues - allocate horizontally across pipes */
-	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
-		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
-			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
-				if (!amdgpu_gfx_is_mec_queue_enabled(adev,
-								0, i, k, j))
-					continue;
+	if (adev->gfx.num_compute_rings) {
+		ring_id = 0;
+		/* set up the compute queues - allocate horizontally across pipes */
+		for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+			for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+				for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+					if (!amdgpu_gfx_is_mec_queue_enabled(adev,
+									     0, i, k, j))
+						continue;
 
-				r = gfx_v12_0_compute_ring_init(adev, ring_id,
-								i, k, j);
-				if (r)
-					return r;
+					r = gfx_v12_0_compute_ring_init(adev, ring_id,
+									i, k, j);
+					if (r)
+						return r;
 
-				ring_id++;
+					ring_id++;
+				}
 			}
 		}
 	}
@@ -2948,6 +3014,8 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
 #ifdef __BIG_ENDIAN
 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
 #endif
+	if (prop->tmz_queue)
+		tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
 	mqd->cp_gfx_hqd_cntl = tmp;
 
 	/* set up cp_doorbell_control */
@@ -2968,6 +3036,14 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
 	/* active the queue */
 	mqd->cp_gfx_hqd_active = 1;
 
+	/* set gfx UQ items */
+	mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+	mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+	mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+	mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+	mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+	mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
 	return 0;
 }
 
@@ -3091,6 +3167,8 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+	if (prop->tmz_queue)
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
 	mqd->cp_hqd_pq_control = tmp;
 
 	/* set the wb address whether it's enabled or not */
@@ -3142,6 +3220,10 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
 
 	mqd->cp_hqd_active = prop->hqd_active;
 
+	/* set UQ fenceaddress */
+	mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+	mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
 	return 0;
 }
 
@@ -3600,6 +3682,49 @@ static int gfx_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
 	return r;
 }
 
+static int gfx_v12_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
+					      bool enable)
+{
+	unsigned int irq_type;
+	int m, p, r;
+
+	if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
+		for (m = 0; m < adev->gfx.me.num_me; m++) {
+			for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
+				irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
+				if (enable)
+					r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				else
+					r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				if (r)
+					return r;
+			}
+		}
+	}
+
+	if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
+		for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
+			for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
+				irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+					+ (m * adev->gfx.mec.num_pipe_per_mec)
+					+ p;
+				if (enable)
+					r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				else
+					r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				if (r)
+					return r;
+			}
+		}
+	}
+
+	return 0;
+}
+
 static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
@@ -3610,6 +3735,7 @@ static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
 	amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+	gfx_v12_0_set_userq_eop_interrupts(adev, false);
 
 	if (!adev->no_hw_access) {
 		if (amdgpu_async_gfx_ring) {
@@ -3698,11 +3824,33 @@ static int gfx_v12_0_early_init(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
 
+	switch (amdgpu_user_queue) {
+	case -1:
+	case 0:
+	default:
+		adev->gfx.disable_kq = false;
+		adev->gfx.disable_uq = true;
+		break;
+	case 1:
+		adev->gfx.disable_kq = false;
+		adev->gfx.disable_uq = false;
+		break;
+	case 2:
+		adev->gfx.disable_kq = true;
+		adev->gfx.disable_uq = false;
+		break;
+	}
+
 	adev->gfx.funcs = &gfx_v12_0_gfx_funcs;
 
-	adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS;
-	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
-					  AMDGPU_MAX_COMPUTE_RINGS);
+	if (adev->gfx.disable_kq) {
+		adev->gfx.num_gfx_rings = 0;
+		adev->gfx.num_compute_rings = 0;
+	} else {
+		adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS;
+		adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+						  AMDGPU_MAX_COMPUTE_RINGS);
+	}
 
 	gfx_v12_0_set_kiq_pm4_funcs(adev);
 	gfx_v12_0_set_ring_funcs(adev);
@@ -3733,6 +3881,10 @@ static int gfx_v12_0_late_init(struct amdgpu_ip_block *ip_block)
 	if (r)
 		return r;
 
+	r = gfx_v12_0_set_userq_eop_interrupts(adev, true);
+	if (r)
+		return r;
+
 	return 0;
 }
 
@@ -4172,45 +4324,17 @@ static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
 static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
-	uint32_t *wptr_saved;
-	uint32_t *is_queue_unmap;
-	uint64_t aggregated_db_index;
-	uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
-	uint64_t wptr_tmp;
-
-	if (ring->is_mes_queue) {
-		wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
-		is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
-					      sizeof(uint32_t));
-		aggregated_db_index =
-			amdgpu_mes_get_aggregated_doorbell_index(adev,
-								 ring->hw_prio);
-
-		wptr_tmp = ring->wptr & ring->buf_mask;
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
-		*wptr_saved = wptr_tmp;
-		/* assume doorbell always being used by mes mapped queue */
-		if (*is_queue_unmap) {
-			WDOORBELL64(aggregated_db_index, wptr_tmp);
-			WDOORBELL64(ring->doorbell_index, wptr_tmp);
-		} else {
-			WDOORBELL64(ring->doorbell_index, wptr_tmp);
 
-			if (*is_queue_unmap)
-				WDOORBELL64(aggregated_db_index, wptr_tmp);
-		}
+	if (ring->use_doorbell) {
+		/* XXX check if swapping is necessary on BE */
+		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+			     ring->wptr);
+		WDOORBELL64(ring->doorbell_index, ring->wptr);
 	} else {
-		if (ring->use_doorbell) {
-			/* XXX check if swapping is necessary on BE */
-			atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-				     ring->wptr);
-			WDOORBELL64(ring->doorbell_index, ring->wptr);
-		} else {
-			WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
-				     lower_32_bits(ring->wptr));
-			WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
-				     upper_32_bits(ring->wptr));
-		}
+		WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
+			     lower_32_bits(ring->wptr));
+		WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
+			     upper_32_bits(ring->wptr));
 	}
 }
 
@@ -4235,42 +4359,14 @@ static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
 static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
-	uint32_t *wptr_saved;
-	uint32_t *is_queue_unmap;
-	uint64_t aggregated_db_index;
-	uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
-	uint64_t wptr_tmp;
-
-	if (ring->is_mes_queue) {
-		wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
-		is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
-					      sizeof(uint32_t));
-		aggregated_db_index =
-			amdgpu_mes_get_aggregated_doorbell_index(adev,
-								 ring->hw_prio);
-
-		wptr_tmp = ring->wptr & ring->buf_mask;
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
-		*wptr_saved = wptr_tmp;
-		/* assume doorbell always used by mes mapped queue */
-		if (*is_queue_unmap) {
-			WDOORBELL64(aggregated_db_index, wptr_tmp);
-			WDOORBELL64(ring->doorbell_index, wptr_tmp);
-		} else {
-			WDOORBELL64(ring->doorbell_index, wptr_tmp);
 
-			if (*is_queue_unmap)
-				WDOORBELL64(aggregated_db_index, wptr_tmp);
-		}
+	/* XXX check if swapping is necessary on BE */
+	if (ring->use_doorbell) {
+		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+			     ring->wptr);
+		WDOORBELL64(ring->doorbell_index, ring->wptr);
 	} else {
-		/* XXX check if swapping is necessary on BE */
-		if (ring->use_doorbell) {
-			atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-				     ring->wptr);
-			WDOORBELL64(ring->doorbell_index, ring->wptr);
-		} else {
-			BUG(); /* only DOORBELL method supported on gfx12 now */
-		}
+		BUG(); /* only DOORBELL method supported on gfx12 now */
 	}
 }
 
@@ -4317,10 +4413,6 @@ static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 
 	control |= ib->length_dw | (vmid << 24);
 
-	if (ring->is_mes_queue)
-		/* inherit vmid from mqd */
-		control |= 0x400000;
-
 	amdgpu_ring_write(ring, header);
 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
 	amdgpu_ring_write(ring,
@@ -4340,10 +4432,6 @@ static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
 
-	if (ring->is_mes_queue)
-		/* inherit vmid from mqd */
-		control |= 0x40000000;
-
 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
 	amdgpu_ring_write(ring,
@@ -4383,8 +4471,7 @@ static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 	amdgpu_ring_write(ring, upper_32_bits(addr));
 	amdgpu_ring_write(ring, lower_32_bits(seq));
 	amdgpu_ring_write(ring, upper_32_bits(seq));
-	amdgpu_ring_write(ring, ring->is_mes_queue ?
-			 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
+	amdgpu_ring_write(ring, 0);
 }
 
 static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@@ -4412,10 +4499,7 @@ static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
 static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					 unsigned vmid, uint64_t pd_addr)
 {
-	if (ring->is_mes_queue)
-		gfx_v12_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
-	else
-		amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
 
 	/* compute doesn't have PFP */
 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
@@ -4749,25 +4833,23 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev,
 			     struct amdgpu_irq_src *source,
 			     struct amdgpu_iv_entry *entry)
 {
-	int i;
+	u32 doorbell_offset = entry->src_data[0];
 	u8 me_id, pipe_id, queue_id;
 	struct amdgpu_ring *ring;
-	uint32_t mes_queue_id = entry->src_data[0];
+	int i;
 
 	DRM_DEBUG("IH: CP EOP\n");
 
-	if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
-		struct amdgpu_mes_queue *queue;
+	if (adev->enable_mes && doorbell_offset) {
+		struct amdgpu_userq_fence_driver *fence_drv = NULL;
+		struct xarray *xa = &adev->userq_xa;
+		unsigned long flags;
 
-		mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
-
-		spin_lock(&adev->mes.queue_id_lock);
-		queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
-		if (queue) {
-			DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
-			amdgpu_fence_process(queue->ring);
-		}
-		spin_unlock(&adev->mes.queue_id_lock);
+		xa_lock_irqsave(xa, flags);
+		fence_drv = xa_load(xa, doorbell_offset);
+		if (fence_drv)
+			amdgpu_userq_fence_driver_process(fence_drv);
+		xa_unlock_irqrestore(xa, flags);
 	} else {
 		me_id = (entry->ring_id & 0x0c) >> 2;
 		pipe_id = (entry->ring_id & 0x03) >> 0;
@@ -4934,27 +5016,29 @@ static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev,
 	pipe_id = (entry->ring_id & 0x03) >> 0;
 	queue_id = (entry->ring_id & 0x70) >> 4;
 
-	switch (me_id) {
-	case 0:
-		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
-			ring = &adev->gfx.gfx_ring[i];
-			if (ring->me == me_id && ring->pipe == pipe_id &&
-			    ring->queue == queue_id)
-				drm_sched_fault(&ring->sched);
-		}
-		break;
-	case 1:
-	case 2:
-		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-			ring = &adev->gfx.compute_ring[i];
-			if (ring->me == me_id && ring->pipe == pipe_id &&
-			    ring->queue == queue_id)
-				drm_sched_fault(&ring->sched);
+	if (!adev->gfx.disable_kq) {
+		switch (me_id) {
+		case 0:
+			for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+				ring = &adev->gfx.gfx_ring[i];
+				if (ring->me == me_id && ring->pipe == pipe_id &&
+				    ring->queue == queue_id)
+					drm_sched_fault(&ring->sched);
+			}
+			break;
+		case 1:
+		case 2:
+			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+				ring = &adev->gfx.compute_ring[i];
+				if (ring->me == me_id && ring->pipe == pipe_id &&
+				    ring->queue == queue_id)
+					drm_sched_fault(&ring->sched);
+			}
+			break;
+		default:
+			BUG();
+			break;
 		}
-		break;
-	default:
-		BUG();
-		break;
 	}
 }
 
@@ -5160,6 +5244,69 @@ static void gfx_v12_ip_dump(struct amdgpu_ip_block *ip_block)
 	amdgpu_gfx_off_ctrl(adev, true);
 }
 
+static bool gfx_v12_pipe_reset_support(struct amdgpu_device *adev)
+{
+	/* Disable the pipe reset until the CPFW fully support it.*/
+	dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
+	return false;
+}
+
+static int gfx_v12_reset_gfx_pipe(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t reset_pipe = 0, clean_pipe = 0;
+	int r;
+
+	if (!gfx_v12_pipe_reset_support(adev))
+		return -EOPNOTSUPP;
+
+	gfx_v12_0_set_safe_mode(adev, 0);
+	mutex_lock(&adev->srbm_mutex);
+	soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+	switch (ring->pipe) {
+	case 0:
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   PFP_PIPE0_RESET, 1);
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   ME_PIPE0_RESET, 1);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   PFP_PIPE0_RESET, 0);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   ME_PIPE0_RESET, 0);
+		break;
+	case 1:
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   PFP_PIPE1_RESET, 1);
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   ME_PIPE1_RESET, 1);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   PFP_PIPE1_RESET, 0);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   ME_PIPE1_RESET, 0);
+		break;
+	default:
+		break;
+	}
+
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
+
+	r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
+					RS64_FW_UC_START_ADDR_LO;
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	gfx_v12_0_unset_safe_mode(adev, 0);
+
+	dev_info(adev->dev, "The ring %s pipe reset: %s\n", ring->name,
+			r == 0 ? "successfully" : "failed");
+	/* Sometimes the ME start pc counter can't cache correctly, so the
+	 * PC check only as a reference and pipe reset result rely on the
+	 * later ring test.
+	 */
+	return 0;
+}
+
 static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -5170,8 +5317,10 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
 
 	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
 	if (r) {
-		dev_err(adev->dev, "reset via MES failed %d\n", r);
-		return r;
+		dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
+		r = gfx_v12_reset_gfx_pipe(ring);
+		if (r)
+			return r;
 	}
 
 	r = gfx_v12_0_kgq_init_queue(ring, true);
@@ -5189,6 +5338,89 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
 	return amdgpu_ring_test_ring(ring);
 }
 
+static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t reset_pipe = 0, clean_pipe = 0;
+	int r = 0;
+
+	if (!gfx_v12_pipe_reset_support(adev))
+		return -EOPNOTSUPP;
+
+	gfx_v12_0_set_safe_mode(adev, 0);
+	mutex_lock(&adev->srbm_mutex);
+	soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+	reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+	clean_pipe = reset_pipe;
+
+	if (adev->gfx.rs64_enable) {
+		switch (ring->pipe) {
+		case 0:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE0_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE0_RESET, 0);
+			break;
+		case 1:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE1_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE1_RESET, 0);
+			break;
+		case 2:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE2_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE2_RESET, 0);
+			break;
+		case 3:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE3_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE3_RESET, 0);
+			break;
+		default:
+			break;
+		}
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+		r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
+				RS64_FW_UC_START_ADDR_LO;
+	} else {
+		switch (ring->pipe) {
+		case 0:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE0_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE0_RESET, 0);
+			break;
+		case 1:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE1_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE1_RESET, 0);
+			break;
+		default:
+		break;
+		}
+		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
+		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+		/* Doesn't find the F32 MEC instruction pointer register, and suppose
+		 * the driver won't run into the F32 mode.
+		 */
+	}
+
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	gfx_v12_0_unset_safe_mode(adev, 0);
+
+	dev_info(adev->dev, "The ring %s pipe resets: %s\n", ring->name,
+			r == 0 ? "successfully" : "failed");
+	/* Need the ring test to verify the pipe reset result.*/
+	return 0;
+}
+
 static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -5199,8 +5431,10 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
 
 	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
 	if (r) {
-		dev_err(adev->dev, "reset via MMIO failed %d\n", r);
-		return r;
+		dev_warn(adev->dev, "fail(%d) to reset kcq  and try pipe reset\n", r);
+		r = gfx_v12_0_reset_compute_pipe(ring);
+		if (r)
+			return r;
 	}
 
 	r = gfx_v12_0_kcq_init_queue(ring, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 13fbee46417a..70d7a1f434c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -53,6 +53,9 @@
 #define VERDE_GB_ADDR_CONFIG_GOLDEN         0x12010002
 #define HAINAN_GB_ADDR_CONFIG_GOLDEN        0x02010001
 
+#define GFX6_NUM_GFX_RINGS     1
+#define GFX6_NUM_COMPUTE_RINGS 2
+
 static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev);
@@ -1732,10 +1735,14 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
 	gfx_v6_0_get_cu_info(adev);
 	gfx_v6_0_config_init(adev);
 
-	WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) |
-				       (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT)));
-	WREG32(mmCP_MEQ_THRESHOLDS, (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
-				    (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
+	WREG32(mmCP_QUEUE_THRESHOLDS,
+		((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) |
+		(0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT)));
+
+	/* set HW defaults for 3D engine */
+	WREG32(mmCP_MEQ_THRESHOLDS,
+		(0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
+		(0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
 
 	sx_debug_1 = RREG32(mmSX_DEBUG_1);
 	WREG32(mmSX_DEBUG_1, sx_debug_1);
@@ -2851,44 +2858,21 @@ static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev)
 static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev,
 				    volatile u32 *buffer)
 {
-	u32 count = 0, i;
-	const struct cs_section_def *sect = NULL;
-	const struct cs_extent_def *ext = NULL;
+	u32 count = 0;
 
 	if (adev->gfx.rlc.cs_data == NULL)
 		return;
 	if (buffer == NULL)
 		return;
 
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
-	buffer[count++] = cpu_to_le32(0x80000000);
-	buffer[count++] = cpu_to_le32(0x80000000);
-
-	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
-		for (ext = sect->section; ext->extent != NULL; ++ext) {
-			if (sect->id == SECT_CONTEXT) {
-				buffer[count++] =
-					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
-				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
-				for (i = 0; i < ext->reg_count; i++)
-					buffer[count++] = cpu_to_le32(ext->extent[i]);
-			} else {
-				return;
-			}
-		}
-	}
+	count = amdgpu_gfx_csb_preamble_start(buffer);
+	count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
 
 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
 
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
-	buffer[count++] = cpu_to_le32(0);
+	amdgpu_gfx_csb_preamble_end(buffer, count);
 }
 
 static void gfx_v6_0_init_pg(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 8181bd0e4f18..da0534ff1271 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -55,6 +55,9 @@
 #define GFX7_NUM_GFX_RINGS     1
 #define GFX7_MEC_HPD_SIZE      2048
 
+#define BONAIRE_GB_ADDR_CONFIG_GOLDEN        0x12010001
+#define HAWAII_GB_ADDR_CONFIG_GOLDEN         0x12011003
+
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
@@ -3882,67 +3885,22 @@ static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
 static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
 				    volatile u32 *buffer)
 {
-	u32 count = 0, i;
-	const struct cs_section_def *sect = NULL;
-	const struct cs_extent_def *ext = NULL;
+	u32 count = 0;
 
 	if (adev->gfx.rlc.cs_data == NULL)
 		return;
 	if (buffer == NULL)
 		return;
 
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
-	buffer[count++] = cpu_to_le32(0x80000000);
-	buffer[count++] = cpu_to_le32(0x80000000);
-
-	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
-		for (ext = sect->section; ext->extent != NULL; ++ext) {
-			if (sect->id == SECT_CONTEXT) {
-				buffer[count++] =
-					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
-				buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
-				for (i = 0; i < ext->reg_count; i++)
-					buffer[count++] = cpu_to_le32(ext->extent[i]);
-			} else {
-				return;
-			}
-		}
-	}
+	count = amdgpu_gfx_csb_preamble_start(buffer);
+	count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
 
 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
-	switch (adev->asic_type) {
-	case CHIP_BONAIRE:
-		buffer[count++] = cpu_to_le32(0x16000012);
-		buffer[count++] = cpu_to_le32(0x00000000);
-		break;
-	case CHIP_KAVERI:
-		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
-		buffer[count++] = cpu_to_le32(0x00000000);
-		break;
-	case CHIP_KABINI:
-	case CHIP_MULLINS:
-		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
-		buffer[count++] = cpu_to_le32(0x00000000);
-		break;
-	case CHIP_HAWAII:
-		buffer[count++] = cpu_to_le32(0x3a00161a);
-		buffer[count++] = cpu_to_le32(0x0000002e);
-		break;
-	default:
-		buffer[count++] = cpu_to_le32(0x00000000);
-		buffer[count++] = cpu_to_le32(0x00000000);
-		break;
-	}
-
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
+	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
 
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
-	buffer[count++] = cpu_to_le32(0);
+	amdgpu_gfx_csb_preamble_end(buffer, count);
 }
 
 static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index bfedd487efc5..5ee2237d8ee8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1223,48 +1223,22 @@ out:
 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
 				    volatile u32 *buffer)
 {
-	u32 count = 0, i;
-	const struct cs_section_def *sect = NULL;
-	const struct cs_extent_def *ext = NULL;
+	u32 count = 0;
 
 	if (adev->gfx.rlc.cs_data == NULL)
 		return;
 	if (buffer == NULL)
 		return;
 
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
-	buffer[count++] = cpu_to_le32(0x80000000);
-	buffer[count++] = cpu_to_le32(0x80000000);
-
-	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
-		for (ext = sect->section; ext->extent != NULL; ++ext) {
-			if (sect->id == SECT_CONTEXT) {
-				buffer[count++] =
-					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
-				buffer[count++] = cpu_to_le32(ext->reg_index -
-						PACKET3_SET_CONTEXT_REG_START);
-				for (i = 0; i < ext->reg_count; i++)
-					buffer[count++] = cpu_to_le32(ext->extent[i]);
-			} else {
-				return;
-			}
-		}
-	}
+	count = amdgpu_gfx_csb_preamble_start(buffer);
+	count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
 
 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
-	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
-			PACKET3_SET_CONTEXT_REG_START);
+	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
 
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
-	buffer[count++] = cpu_to_le32(0);
+	amdgpu_gfx_csb_preamble_end(buffer, count);
 }
 
 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d7db4cb907ae..d377a7c57d5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -225,17 +225,36 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
-	/* cp header registers */
-	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
-	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
-	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
-	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
-	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
 	/* SE status registers */
 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
-	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
+	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3),
+	/* packet headers */
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP)
 };
 
 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
@@ -277,6 +296,14 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP)
 };
 
 enum ta_ras_gfx_subblock {
@@ -1624,42 +1651,16 @@ static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
 				    volatile u32 *buffer)
 {
-	u32 count = 0, i;
-	const struct cs_section_def *sect = NULL;
-	const struct cs_extent_def *ext = NULL;
+	u32 count = 0;
 
 	if (adev->gfx.rlc.cs_data == NULL)
 		return;
 	if (buffer == NULL)
 		return;
 
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
-	buffer[count++] = cpu_to_le32(0x80000000);
-	buffer[count++] = cpu_to_le32(0x80000000);
-
-	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
-		for (ext = sect->section; ext->extent != NULL; ++ext) {
-			if (sect->id == SECT_CONTEXT) {
-				buffer[count++] =
-					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
-				buffer[count++] = cpu_to_le32(ext->reg_index -
-						PACKET3_SET_CONTEXT_REG_START);
-				for (i = 0; i < ext->reg_count; i++)
-					buffer[count++] = cpu_to_le32(ext->extent[i]);
-			} else {
-				return;
-			}
-		}
-	}
-
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
-	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
-	buffer[count++] = cpu_to_le32(0);
+	count = amdgpu_gfx_csb_preamble_start(buffer);
+	count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
+	amdgpu_gfx_csb_preamble_end(buffer, count);
 }
 
 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
@@ -5441,16 +5442,8 @@ static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
 
 	payload_size = sizeof(struct v9_ce_ib_state);
 
-	if (ring->is_mes_queue) {
-		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
-					  gfx[0].gfx_meta_data) +
-			offsetof(struct v9_gfx_meta_data, ce_payload);
-		ce_payload_cpu_addr =
-			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
-	} else {
-		payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
-		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
-	}
+	payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+	ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
 
 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
@@ -5473,16 +5466,8 @@ static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
 
 	payload_size = sizeof(struct v9_de_ib_state);
 
-	if (ring->is_mes_queue) {
-		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
-					  gfx[0].gfx_meta_data) +
-			offsetof(struct v9_gfx_meta_data, de_payload);
-		de_payload_cpu_addr =
-			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
-	} else {
-		payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
-		de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
-	}
+	payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
+	de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
 
 	((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
 		IB_COMPLETION_STATUS_PREEMPTED;
@@ -5672,19 +5657,9 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
 
 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
 
-	if (ring->is_mes_queue) {
-		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
-				  gfx[0].gfx_meta_data) +
-			offsetof(struct v9_gfx_meta_data, ce_payload);
-		ce_payload_gpu_addr =
-			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		ce_payload_cpu_addr =
-			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-	} else {
-		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
-		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
-		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
-	}
+	offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+	ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+	ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
@@ -5770,28 +5745,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bo
 	void *de_payload_cpu_addr;
 	int cnt;
 
-	if (ring->is_mes_queue) {
-		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
-				  gfx[0].gfx_meta_data) +
-			offsetof(struct v9_gfx_meta_data, de_payload);
-		de_payload_gpu_addr =
-			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		de_payload_cpu_addr =
-			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
-		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
-				  gfx[0].gds_backup) +
-			offsetof(struct v9_gfx_meta_data, de_payload);
-		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-	} else {
-		offset = offsetof(struct v9_gfx_meta_data, de_payload);
-		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
-		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+	offset = offsetof(struct v9_gfx_meta_data, de_payload);
+	de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+	de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
 
-		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
-				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
-				 PAGE_SIZE);
-	}
+	gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+			 AMDGPU_CSA_SIZE - adev->gds.gds_size,
+			 PAGE_SIZE);
 
 	if (usegds) {
 		de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
@@ -7339,9 +7299,14 @@ static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer
 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
 				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
 				for (reg = 0; reg < reg_count; reg++) {
-					drm_printf(p, "%-50s \t 0x%08x\n",
-						   gc_cp_reg_list_9[reg].reg_name,
-						   adev->gfx.ip_dump_compute_queues[index + reg]);
+					if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+						drm_printf(p, "%-50s \t 0x%08x\n",
+							   "mmCP_MEC_ME2_HEADER_DUMP",
+							   adev->gfx.ip_dump_compute_queues[index + reg]);
+					else
+						drm_printf(p, "%-50s \t 0x%08x\n",
+							   gc_cp_reg_list_9[reg].reg_name,
+							   adev->gfx.ip_dump_compute_queues[index + reg]);
 				}
 				index += reg_count;
 			}
@@ -7378,9 +7343,13 @@ static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block)
 				soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
 
 				for (reg = 0; reg < reg_count; reg++) {
-					adev->gfx.ip_dump_compute_queues[index + reg] =
-						RREG32(SOC15_REG_ENTRY_OFFSET(
-							gc_cp_reg_list_9[reg]));
+					if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+						adev->gfx.ip_dump_compute_queues[index + reg] =
+							RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
+					else
+						adev->gfx.ip_dump_compute_queues[index + reg] =
+							RREG32(SOC15_REG_ENTRY_OFFSET(
+								       gc_cp_reg_list_9[reg]));
 				}
 				index += reg_count;
 			}
@@ -7394,8 +7363,14 @@ static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block)
 
 static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
 {
+	struct amdgpu_device *adev = ring->adev;
+
 	/* Emit the cleaner shader */
-	amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
+		amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+	else
+		amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER_9_0, 0));
+
 	amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index d81449f9d822..c48cd47b531f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -1547,7 +1547,7 @@ static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev,
 {
 	uint32_t bank, way, mem;
 	static const char * const vml2_way_str[] = { "BIGK", "4K" };
-	static const char * const utcl2_rounter_str[] = { "VMC", "APT" };
+	static const char * const utcl2_router_str[] = { "VMC", "APT" };
 
 	mem = instance % blk->num_mem_blocks;
 	way = (instance / blk->num_mem_blocks) % blk->num_ways;
@@ -1568,7 +1568,7 @@ static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev,
 		dev_info(
 			adev->dev,
 			"GFX SubBlock UTCL2_ROUTER_IFIF%d_GROUP0_%s, SED %d, DED %d\n",
-			bank, utcl2_rounter_str[mem], sec_cnt, ded_cnt);
+			bank, utcl2_router_str[mem], sec_cnt, ded_cnt);
 		break;
 	case ATC_L2_CACHE_2M:
 		dev_info(
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 53fbf6ca7cdb..9db2bde5c59d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -105,9 +105,6 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = {
 	SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_SAFE_MODE),
 	SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT),
 	SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6),
-	/* cp header registers */
-	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
-	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME2_HEADER_DUMP),
 	/* SE status registers */
 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
@@ -154,6 +151,14 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9_4_3[] = {
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GFX_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
 };
 
 struct amdgpu_gfx_ras gfx_v9_4_3_ras;
@@ -4558,12 +4563,21 @@ static void gfx_v9_4_3_ip_print(struct amdgpu_ip_block *ip_block, struct drm_pri
 						   "\nxcc:%d mec:%d, pipe:%d, queue:%d\n",
 						    xcc_id, i, j, k);
 					for (reg = 0; reg < reg_count; reg++) {
-						drm_printf(p,
-							   "%-50s \t 0x%08x\n",
-							   gc_cp_reg_list_9_4_3[reg].reg_name,
-							   adev->gfx.ip_dump_compute_queues
-								[xcc_offset + inst_offset +
-								reg]);
+						if (i && gc_cp_reg_list_9_4_3[reg].reg_offset ==
+						    regCP_MEC_ME1_HEADER_DUMP)
+							drm_printf(p,
+								   "%-50s \t 0x%08x\n",
+								   "regCP_MEC_ME2_HEADER_DUMP",
+								   adev->gfx.ip_dump_compute_queues
+								   [xcc_offset + inst_offset +
+								    reg]);
+						else
+							drm_printf(p,
+								   "%-50s \t 0x%08x\n",
+								   gc_cp_reg_list_9_4_3[reg].reg_name,
+								   adev->gfx.ip_dump_compute_queues
+								   [xcc_offset + inst_offset +
+								    reg]);
 					}
 					inst_offset += reg_count;
 				}
@@ -4612,12 +4626,20 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block)
 							  GET_INST(GC, xcc_id));
 
 					for (reg = 0; reg < reg_count; reg++) {
-						adev->gfx.ip_dump_compute_queues
-							[xcc_offset +
-							 inst_offset + reg] =
-							RREG32(SOC15_REG_ENTRY_OFFSET_INST(
-								gc_cp_reg_list_9_4_3[reg],
-								GET_INST(GC, xcc_id)));
+						if (i && gc_cp_reg_list_9_4_3[reg].reg_offset ==
+						    regCP_MEC_ME1_HEADER_DUMP)
+							adev->gfx.ip_dump_compute_queues
+								[xcc_offset +
+								 inst_offset + reg] =
+								RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id),
+											regCP_MEC_ME2_HEADER_DUMP));
+						else
+							adev->gfx.ip_dump_compute_queues
+								[xcc_offset +
+								 inst_offset + reg] =
+								RREG32(SOC15_REG_ENTRY_OFFSET_INST(
+									       gc_cp_reg_list_9_4_3[reg],
+									       GET_INST(GC, xcc_id)));
 					}
 					inst_offset += reg_count;
 				}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 809b3a882d0d..a3e2787501f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -428,10 +428,6 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int
 	struct amdgpu_device *adev = ring->adev;
 	uint32_t reg;
 
-	/* MES fw manages IH_VMID_x_LUT updating */
-	if (ring->is_mes_queue)
-		return;
-
 	if (ring->vm_hub == AMDGPU_GFXHUB(0))
 		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
 	else
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index e74e26b6a4f2..917d894a1316 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -393,10 +393,6 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int
 	struct amdgpu_device *adev = ring->adev;
 	uint32_t reg;
 
-	/* MES fw manages IH_VMID_x_LUT updating */
-	if (ring->is_mes_queue)
-		return;
-
 	if (ring->vm_hub == AMDGPU_GFXHUB(0))
 		reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
 	else
@@ -832,7 +828,7 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
 	 * amdgpu graphics/compute will use VMIDs 1-7
 	 * amdkfd will use VMIDs 8-15
 	 */
-	adev->vm_manager.first_kfd_vmid = 8;
+	adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
 
 	amdgpu_vm_manager_init(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index c6f290704d47..b645d3e6a6c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -413,10 +413,6 @@ static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
 	struct amdgpu_device *adev = ring->adev;
 	uint32_t reg;
 
-	/* MES fw manages IH_VMID_x_LUT updating */
-	if (ring->is_mes_queue)
-		return;
-
 	if (ring->vm_hub == AMDGPU_GFXHUB(0))
 		reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
 	else
@@ -820,7 +816,7 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
 	 * amdgpu graphics/compute will use VMIDs 1-7
 	 * amdkfd will use VMIDs 8-15
 	 */
-	adev->vm_manager.first_kfd_vmid = 8;
+	adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
 
 	amdgpu_vm_manager_init(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index a992e79d9581..8030fcd64210 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -249,7 +249,7 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
 
 		/* disable VGA render */
 		tmp = RREG32(mmVGA_RENDER_CONTROL);
-		tmp &= ~VGA_VSTATUS_CNTL;
+		tmp &= VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK;
 		WREG32(mmVGA_RENDER_CONTROL, tmp);
 	}
 	/* Update configuration */
@@ -627,17 +627,16 @@ static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
 	       "write" : "read", block, mc_client, mc_id);
 }
 
-/*
 static const u32 mc_cg_registers[] = {
-	MC_HUB_MISC_HUB_CG,
-	MC_HUB_MISC_SIP_CG,
-	MC_HUB_MISC_VM_CG,
-	MC_XPB_CLK_GAT,
-	ATC_MISC_CG,
-	MC_CITF_MISC_WR_CG,
-	MC_CITF_MISC_RD_CG,
-	MC_CITF_MISC_VM_CG,
-	VM_L2_CG,
+	mmMC_HUB_MISC_HUB_CG,
+	mmMC_HUB_MISC_SIP_CG,
+	mmMC_HUB_MISC_VM_CG,
+	mmMC_XPB_CLK_GAT,
+	mmATC_MISC_CG,
+	mmMC_CITF_MISC_WR_CG,
+	mmMC_CITF_MISC_RD_CG,
+	mmMC_CITF_MISC_VM_CG,
+	mmVM_L2_CG,
 };
 
 static const u32 mc_cg_ls_en[] = {
@@ -672,7 +671,7 @@ static void gmc_v6_0_enable_mc_ls(struct amdgpu_device *adev,
 
 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
 		orig = data = RREG32(mc_cg_registers[i]);
-		if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_LS))
+		if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
 			data |= mc_cg_ls_en[i];
 		else
 			data &= ~mc_cg_ls_en[i];
@@ -689,7 +688,7 @@ static void gmc_v6_0_enable_mc_mgcg(struct amdgpu_device *adev,
 
 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
 		orig = data = RREG32(mc_cg_registers[i]);
-		if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_MGCG))
+		if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
 			data |= mc_cg_en[i];
 		else
 			data &= ~mc_cg_en[i];
@@ -705,7 +704,7 @@ static void gmc_v6_0_enable_bif_mgls(struct amdgpu_device *adev,
 
 	orig = data = RREG32_PCIE(ixPCIE_CNTL2);
 
-	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_BIF_LS)) {
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
 		data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 1);
 		data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 1);
 		data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 1);
@@ -728,7 +727,7 @@ static void gmc_v6_0_enable_hdp_mgcg(struct amdgpu_device *adev,
 
 	orig = data = RREG32(mmHDP_HOST_PATH_CNTL);
 
-	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_MGCG))
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
 		data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 0);
 	else
 		data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 1);
@@ -744,7 +743,7 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
 
 	orig = data = RREG32(mmHDP_MEM_POWER_LS);
 
-	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_LS))
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
 		data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 1);
 	else
 		data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 0);
@@ -752,7 +751,6 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
 	if (orig != data)
 		WREG32(mmHDP_MEM_POWER_LS, data);
 }
-*/
 
 static int gmc_v6_0_convert_vram_type(int mc_seq_vram_type)
 {
@@ -1098,6 +1096,20 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
 static int gmc_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
 					  enum amd_clockgating_state state)
 {
+	struct amdgpu_device *adev = ip_block->adev;
+	bool gate = false;
+
+	if (state == AMD_CG_STATE_GATE)
+		gate = true;
+
+	if (!(adev->flags & AMD_IS_APU)) {
+		gmc_v6_0_enable_mc_mgcg(adev, gate);
+		gmc_v6_0_enable_mc_ls(adev, gate);
+	}
+	gmc_v6_0_enable_bif_mgls(adev, gate);
+	gmc_v6_0_enable_hdp_mgcg(adev, gate);
+	gmc_v6_0_enable_hdp_ls(adev, gate);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 83e39f16044a..a8d5795084fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1157,17 +1157,10 @@ static bool gmc_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
 static int gmc_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
 {
 	unsigned int i;
-	u32 tmp;
 	struct amdgpu_device *adev = ip_block->adev;
 
 	for (i = 0; i < adev->usec_timeout; i++) {
-		/* read MC_STATUS */
-		tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK |
-					       SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
-					       SRBM_STATUS__MCC_BUSY_MASK |
-					       SRBM_STATUS__MCD_BUSY_MASK |
-					       SRBM_STATUS__VMC_BUSY_MASK);
-		if (!tmp)
+		if (gmc_v7_0_is_idle(ip_block))
 			return 0;
 		udelay(1);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 5effe8327d29..53050176c244 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1213,10 +1213,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 		if (uncached) {
 			mtype = MTYPE_UC;
 		} else if (ext_coherent) {
-			if (gc_ip_version == IP_VERSION(9, 5, 0) || adev->rev_id)
-				mtype = is_local ? MTYPE_CC : MTYPE_UC;
-			else
-				mtype = MTYPE_UC;
+			mtype = is_local ? MTYPE_CC : MTYPE_UC;
 		} else if (adev->flags & AMD_IS_APU) {
 			mtype = is_local ? mtype_local : MTYPE_NC;
 		} else {
@@ -1336,7 +1333,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
 				mtype_local = MTYPE_CC;
 
 			*flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype_local);
-		} else if (adev->rev_id) {
+		} else {
 			/* MTYPE_UC case */
 			*flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
 		}
@@ -2411,13 +2408,6 @@ static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
 	adev->gmc.flush_tlb_needs_extra_type_2 =
 		amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) &&
 		adev->gmc.xgmi.num_physical_nodes;
-	/*
-	 * TODO: This workaround is badly documented and had a buggy
-	 * implementation. We should probably verify what we do here.
-	 */
-	adev->gmc.flush_tlb_needs_extra_type_0 =
-		amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
-		adev->rev_id == 0;
 
 	/* The sequence of these two function calls matters.*/
 	gmc_v9_0_init_golden_registers(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index f1dc13b3ab38..e6c0d86d3486 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -36,17 +36,6 @@
 #define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK     0x00020000L
 #define mmHDP_MEM_POWER_CTRL_BASE_IDX   0
 
-static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev,
-				struct amdgpu_ring *ring)
-{
-	if (!ring || !ring->funcs->emit_wreg) {
-		WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-		RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
-	} else {
-		amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-	}
-}
-
 static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
 				    struct amdgpu_ring *ring)
 {
@@ -180,7 +169,7 @@ struct amdgpu_hdp_ras hdp_v4_0_ras = {
 };
 
 const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
-	.flush_hdp = hdp_v4_0_flush_hdp,
+	.flush_hdp = amdgpu_hdp_generic_flush,
 	.invalidate_hdp = hdp_v4_0_invalidate_hdp,
 	.update_clock_gating = hdp_v4_0_update_clock_gating,
 	.get_clock_gating_state = hdp_v4_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
index 43195c079748..8bc001dc9f63 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
@@ -27,17 +27,6 @@
 #include "hdp/hdp_5_0_0_sh_mask.h"
 #include <uapi/linux/kfd_ioctl.h>
 
-static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev,
-				struct amdgpu_ring *ring)
-{
-	if (!ring || !ring->funcs->emit_wreg) {
-		WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-		RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
-	} else {
-		amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-	}
-}
-
 static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev,
 				    struct amdgpu_ring *ring)
 {
@@ -217,7 +206,7 @@ static void hdp_v5_0_init_registers(struct amdgpu_device *adev)
 }
 
 const struct amdgpu_hdp_funcs hdp_v5_0_funcs = {
-	.flush_hdp = hdp_v5_0_flush_hdp,
+	.flush_hdp = amdgpu_hdp_generic_flush,
 	.invalidate_hdp = hdp_v5_0_invalidate_hdp,
 	.update_clock_gating = hdp_v5_0_update_clock_gating,
 	.get_clock_gating_state = hdp_v5_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
index fcb8dd2876bc..40940b4ab400 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
@@ -33,7 +33,17 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
 	if (!ring || !ring->funcs->emit_wreg) {
 		WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
 			0);
-		RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
+		if (amdgpu_sriov_vf(adev)) {
+			/* this is fine because SR_IOV doesn't remap the register */
+			RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
+		} else {
+			/* We just need to read back a register to post the write.
+			 * Reading back the remapped register causes problems on
+			 * some platforms so just read back the memory size register.
+			 */
+			if (adev->nbio.funcs->get_memsize)
+				adev->nbio.funcs->get_memsize(adev);
+		}
 	} else {
 		amdgpu_ring_emit_wreg(ring,
 			(adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
index a88d25a06c29..ec20daf4272c 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
@@ -30,17 +30,6 @@
 #define regHDP_CLK_CNTL_V6_1	0xd5
 #define regHDP_CLK_CNTL_V6_1_BASE_IDX 0
 
-static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev,
-				struct amdgpu_ring *ring)
-{
-	if (!ring || !ring->funcs->emit_wreg) {
-		WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-		RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
-	} else {
-		amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-	}
-}
-
 static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev,
 					 bool enable)
 {
@@ -149,7 +138,7 @@ static void hdp_v6_0_get_clockgating_state(struct amdgpu_device *adev,
 }
 
 const struct amdgpu_hdp_funcs hdp_v6_0_funcs = {
-	.flush_hdp = hdp_v6_0_flush_hdp,
+	.flush_hdp = amdgpu_hdp_generic_flush,
 	.update_clock_gating = hdp_v6_0_update_clock_gating,
 	.get_clock_gating_state = hdp_v6_0_get_clockgating_state,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
index 49f7eb4fbd11..ed1debc03507 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
@@ -27,17 +27,6 @@
 #include "hdp/hdp_7_0_0_sh_mask.h"
 #include <uapi/linux/kfd_ioctl.h>
 
-static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev,
-				struct amdgpu_ring *ring)
-{
-	if (!ring || !ring->funcs->emit_wreg) {
-		WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-		RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
-	} else {
-		amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-	}
-}
-
 static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev,
 					 bool enable)
 {
@@ -137,7 +126,7 @@ static void hdp_v7_0_get_clockgating_state(struct amdgpu_device *adev,
 }
 
 const struct amdgpu_hdp_funcs hdp_v7_0_funcs = {
-	.flush_hdp = hdp_v7_0_flush_hdp,
+	.flush_hdp = amdgpu_hdp_generic_flush,
 	.update_clock_gating = hdp_v7_0_update_clock_gating,
 	.get_clock_gating_state = hdp_v7_0_get_clockgating_state,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
index eb4185dcbd1d..5900b560b7de 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -349,6 +349,7 @@ static int ih_v6_0_irq_init(struct amdgpu_device *adev)
 			if (ret)
 				return ret;
 		}
+		ih[i]->overflow = false;
 	}
 
 	/* update doorbell range for ih ring 0 */
@@ -446,7 +447,10 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
 	wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
 	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
 		goto out;
-	wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+	if (!amdgpu_sriov_vf(adev))
+		wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+	else
+		ih->overflow = true;
 
 	/* When a ring buffer overflow happen start parsing interrupt
 	 * from the last not overwritten vector (wptr + 32). Hopefully
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
index 218e16b68f1d..6f73033d78b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
@@ -156,7 +156,7 @@ static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
 
 		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
 			ring = &adev->jpeg.inst[i].ring_dec[j];
-			ring->use_doorbell = false;
+			ring->use_doorbell = true;
 			ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
 			if (!amdgpu_sriov_vf(adev)) {
 				ring->doorbell_index =
@@ -264,7 +264,7 @@ static int jpeg_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
 			ring = &adev->jpeg.inst[i].ring_dec[j];
 			if (ring->use_doorbell)
 				WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL,
-						    (ring->pipe ? (ring->pipe - 0x15) : 0),
+						    ring->pipe,
 						    ring->doorbell_index <<
 						    VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
 						    VCN_JPEG_DB_CTRL__EN_MASK);
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
new file mode 100644
index 000000000000..d6f50b13e2ba
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -0,0 +1,355 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
+#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
+
+static int
+mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
+{
+	int ret;
+
+	ret = amdgpu_bo_reserve(bo, true);
+	if (ret) {
+		DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
+		goto err_reserve_bo_failed;
+	}
+
+	ret = amdgpu_ttm_alloc_gart(&bo->tbo);
+	if (ret) {
+		DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+		goto err_map_bo_gart_failed;
+	}
+
+	amdgpu_bo_unreserve(bo);
+	bo = amdgpu_bo_ref(bo);
+
+	return 0;
+
+err_map_bo_gart_failed:
+	amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+	return ret;
+}
+
+static int
+mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
+			      struct amdgpu_usermode_queue *queue,
+			      uint64_t wptr)
+{
+	struct amdgpu_bo_va_mapping *wptr_mapping;
+	struct amdgpu_vm *wptr_vm;
+	struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj;
+	int ret;
+
+	wptr_vm = queue->vm;
+	ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+	if (ret)
+		return ret;
+
+	wptr &= AMDGPU_GMC_HOLE_MASK;
+	wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
+	amdgpu_bo_unreserve(wptr_vm->root.bo);
+	if (!wptr_mapping) {
+		DRM_ERROR("Failed to lookup wptr bo\n");
+		return -EINVAL;
+	}
+
+	wptr_obj->obj = wptr_mapping->bo_va->base.bo;
+	if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
+		DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n");
+		return -EINVAL;
+	}
+
+	ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj);
+	if (ret) {
+		DRM_ERROR("Failed to map wptr bo to GART\n");
+		return ret;
+	}
+
+	queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);
+	return 0;
+}
+
+static int convert_to_mes_priority(int priority)
+{
+	switch (priority) {
+	case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW:
+	default:
+		return AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+	case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW:
+		return AMDGPU_MES_PRIORITY_LEVEL_LOW;
+	case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH:
+		return AMDGPU_MES_PRIORITY_LEVEL_MEDIUM;
+	case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH:
+		return AMDGPU_MES_PRIORITY_LEVEL_HIGH;
+	}
+}
+
+static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr,
+			 struct amdgpu_usermode_queue *queue)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+	struct amdgpu_mqd_prop *userq_props = queue->userq_prop;
+	struct mes_add_queue_input queue_input;
+	int r;
+
+	memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
+
+	queue_input.process_va_start = 0;
+	queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
+
+	/* set process quantum to 10 ms and gang quantum to 1 ms as default */
+	queue_input.process_quantum = 100000;
+	queue_input.gang_quantum = 10000;
+	queue_input.paging = false;
+
+	queue_input.process_context_addr = ctx->gpu_addr;
+	queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+	queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+	queue_input.gang_global_priority_level = convert_to_mes_priority(queue->priority);
+
+	queue_input.process_id = queue->vm->pasid;
+	queue_input.queue_type = queue->queue_type;
+	queue_input.mqd_addr = queue->mqd.gpu_addr;
+	queue_input.wptr_addr = userq_props->wptr_gpu_addr;
+	queue_input.queue_size = userq_props->queue_size >> 2;
+	queue_input.doorbell_offset = userq_props->doorbell_index;
+	queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo);
+	queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;
+
+	amdgpu_mes_lock(&adev->mes);
+	r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
+	amdgpu_mes_unlock(&adev->mes);
+	if (r) {
+		DRM_ERROR("Failed to map queue in HW, err (%d)\n", r);
+		return r;
+	}
+
+	DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index);
+	return 0;
+}
+
+static int mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
+			   struct amdgpu_usermode_queue *queue)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	struct mes_remove_queue_input queue_input;
+	struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+	int r;
+
+	memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
+	queue_input.doorbell_offset = queue->doorbell_index;
+	queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+
+	amdgpu_mes_lock(&adev->mes);
+	r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
+	amdgpu_mes_unlock(&adev->mes);
+	if (r)
+		DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r);
+	return r;
+}
+
+static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
+				      struct amdgpu_usermode_queue *queue,
+				      struct drm_amdgpu_userq_in *mqd_user)
+{
+	struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+	int r, size;
+
+	/*
+	 * The FW expects at least one page space allocated for
+	 * process ctx and gang ctx each. Create an object
+	 * for the same.
+	 */
+	size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ;
+	r = amdgpu_userq_create_object(uq_mgr, ctx, size);
+	if (r) {
+		DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r);
+		return r;
+	}
+
+	return 0;
+}
+
+static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
+				struct drm_amdgpu_userq_in *args_in,
+				struct amdgpu_usermode_queue *queue)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type];
+	struct drm_amdgpu_userq_in *mqd_user = args_in;
+	struct amdgpu_mqd_prop *userq_props;
+	int r;
+
+	/* Structure to initialize MQD for userqueue using generic MQD init function */
+	userq_props = kzalloc(sizeof(struct amdgpu_mqd_prop), GFP_KERNEL);
+	if (!userq_props) {
+		DRM_ERROR("Failed to allocate memory for userq_props\n");
+		return -ENOMEM;
+	}
+
+	if (!mqd_user->wptr_va || !mqd_user->rptr_va ||
+	    !mqd_user->queue_va || mqd_user->queue_size == 0) {
+		DRM_ERROR("Invalid MQD parameters for userqueue\n");
+		r = -EINVAL;
+		goto free_props;
+	}
+
+	r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size);
+	if (r) {
+		DRM_ERROR("Failed to create MQD object for userqueue\n");
+		goto free_props;
+	}
+
+	/* Initialize the MQD BO with user given values */
+	userq_props->wptr_gpu_addr = mqd_user->wptr_va;
+	userq_props->rptr_gpu_addr = mqd_user->rptr_va;
+	userq_props->queue_size = mqd_user->queue_size;
+	userq_props->hqd_base_gpu_addr = mqd_user->queue_va;
+	userq_props->mqd_gpu_addr = queue->mqd.gpu_addr;
+	userq_props->use_doorbell = true;
+	userq_props->doorbell_index = queue->doorbell_index;
+	userq_props->fence_address = queue->fence_drv->gpu_addr;
+
+	if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
+		struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd;
+
+		if (mqd_user->mqd_size != sizeof(*compute_mqd)) {
+			DRM_ERROR("Invalid compute IP MQD size\n");
+			r = -EINVAL;
+			goto free_mqd;
+		}
+
+		compute_mqd = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+		if (IS_ERR(compute_mqd)) {
+			DRM_ERROR("Failed to read user MQD\n");
+			r = -ENOMEM;
+			goto free_mqd;
+		}
+
+		userq_props->eop_gpu_addr = compute_mqd->eop_va;
+		userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
+		userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
+		userq_props->hqd_active = false;
+		userq_props->tmz_queue =
+			mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
+		kfree(compute_mqd);
+	} else if (queue->queue_type == AMDGPU_HW_IP_GFX) {
+		struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11;
+
+		if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) {
+			DRM_ERROR("Invalid GFX MQD\n");
+			r = -EINVAL;
+			goto free_mqd;
+		}
+
+		mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+		if (IS_ERR(mqd_gfx_v11)) {
+			DRM_ERROR("Failed to read user MQD\n");
+			r = -ENOMEM;
+			goto free_mqd;
+		}
+
+		userq_props->shadow_addr = mqd_gfx_v11->shadow_va;
+		userq_props->csa_addr = mqd_gfx_v11->csa_va;
+		userq_props->tmz_queue =
+			mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
+		kfree(mqd_gfx_v11);
+	} else if (queue->queue_type == AMDGPU_HW_IP_DMA) {
+		struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11;
+
+		if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) {
+			DRM_ERROR("Invalid SDMA MQD\n");
+			r = -EINVAL;
+			goto free_mqd;
+		}
+
+		mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+		if (IS_ERR(mqd_sdma_v11)) {
+			DRM_ERROR("Failed to read sdma user MQD\n");
+			r = -ENOMEM;
+			goto free_mqd;
+		}
+
+		userq_props->csa_addr = mqd_sdma_v11->csa_va;
+		kfree(mqd_sdma_v11);
+	}
+
+	queue->userq_prop = userq_props;
+
+	r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props);
+	if (r) {
+		DRM_ERROR("Failed to initialize MQD for userqueue\n");
+		goto free_mqd;
+	}
+
+	/* Create BO for FW operations */
+	r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user);
+	if (r) {
+		DRM_ERROR("Failed to allocate BO for userqueue (%d)", r);
+		goto free_mqd;
+	}
+
+	/* FW expects WPTR BOs to be mapped into GART */
+	r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr);
+	if (r) {
+		DRM_ERROR("Failed to create WPTR mapping\n");
+		goto free_ctx;
+	}
+
+	return 0;
+
+free_ctx:
+	amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+
+free_mqd:
+	amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
+
+free_props:
+	kfree(userq_props);
+
+	return r;
+}
+
+static void
+mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
+		      struct amdgpu_usermode_queue *queue)
+{
+	amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+	kfree(queue->userq_prop);
+	amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
+}
+
+const struct amdgpu_userq_funcs userq_mes_funcs = {
+	.mqd_create = mes_userq_mqd_create,
+	.mqd_destroy = mes_userq_mqd_destroy,
+	.unmap = mes_userq_unmap,
+	.map = mes_userq_map,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h
new file mode 100644
index 000000000000..090ae8897770
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef MES_USERQ_H
+#define MES_USERQ_H
+#include "amdgpu_userq.h"
+
+extern const struct amdgpu_userq_funcs userq_mes_funcs;
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index ef9538fbbf53..c9eba537de09 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -287,6 +287,23 @@ static int convert_to_mes_queue_type(int queue_type)
 	return -1;
 }
 
+static int convert_to_mes_priority_level(int priority_level)
+{
+	switch (priority_level) {
+	case AMDGPU_MES_PRIORITY_LEVEL_LOW:
+		return AMD_PRIORITY_LEVEL_LOW;
+	case AMDGPU_MES_PRIORITY_LEVEL_NORMAL:
+	default:
+		return AMD_PRIORITY_LEVEL_NORMAL;
+	case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM:
+		return AMD_PRIORITY_LEVEL_MEDIUM;
+	case AMDGPU_MES_PRIORITY_LEVEL_HIGH:
+		return AMD_PRIORITY_LEVEL_HIGH;
+	case AMDGPU_MES_PRIORITY_LEVEL_REALTIME:
+		return AMD_PRIORITY_LEVEL_REALTIME;
+	}
+}
+
 static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
 				  struct mes_add_queue_input *input)
 {
@@ -310,9 +327,9 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
 	mes_add_queue_pkt.gang_quantum = input->gang_quantum;
 	mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
 	mes_add_queue_pkt.inprocess_gang_priority =
-		input->inprocess_gang_priority;
+		convert_to_mes_priority_level(input->inprocess_gang_priority);
 	mes_add_queue_pkt.gang_global_priority_level =
-		input->gang_global_priority_level;
+		convert_to_mes_priority_level(input->gang_global_priority_level);
 	mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
 	mes_add_queue_pkt.mqd_addr = input->mqd_addr;
 
@@ -458,31 +475,6 @@ static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_typ
 	return r;
 }
 
-static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes,
-				    struct mes_reset_queue_input *input)
-{
-	if (input->use_mmio)
-		return mes_v11_0_reset_queue_mmio(mes, input->queue_type,
-						  input->me_id, input->pipe_id,
-						  input->queue_id, input->vmid);
-
-	union MESAPI__RESET mes_reset_queue_pkt;
-
-	memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
-
-	mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
-	mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
-	mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
-	mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
-	mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr;
-	/*mes_reset_queue_pkt.reset_queue_only = 1;*/
-
-	return mes_v11_0_submit_pkt_and_poll_completion(mes,
-			&mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
-			offsetof(union MESAPI__REMOVE_QUEUE, api_status));
-}
-
 static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes,
 				      struct mes_map_legacy_queue_input *input)
 {
@@ -649,7 +641,7 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
 		break;
 	case MES_MISC_OP_CHANGE_CONFIG:
 		if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) {
-			dev_err(mes->adev->dev, "MES FW versoin must be larger than 0x63 to support limit single process feature.\n");
+			dev_err(mes->adev->dev, "MES FW version must be larger than 0x63 to support limit single process feature.\n");
 			return -EINVAL;
 		}
 		misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
@@ -694,7 +686,8 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
 			mes->compute_hqd_mask[i];
 
 	for (i = 0; i < MAX_GFX_PIPES; i++)
-		mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
+		mes_set_hw_res_pkt.gfx_hqd_mask[i] =
+			mes->gfx_hqd_mask[i];
 
 	for (i = 0; i < MAX_SDMA_PIPES; i++)
 		mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
@@ -723,7 +716,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
 					mes->event_log_gpu_addr;
 	}
 
-	if (enforce_isolation)
+	if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
 		mes_set_hw_res_pkt.limit_single_process = 1;
 
 	return mes_v11_0_submit_pkt_and_poll_completion(mes,
@@ -753,8 +746,8 @@ static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes)
 			offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
 }
 
-static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes,
-					struct mes_reset_legacy_queue_input *input)
+static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes,
+				    struct mes_reset_queue_input *input)
 {
 	union MESAPI__RESET mes_reset_queue_pkt;
 
@@ -772,7 +765,7 @@ static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes,
 	mes_reset_queue_pkt.queue_type =
 		convert_to_mes_queue_type(input->queue_type);
 
-	if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) {
+	if (input->legacy_gfx) {
 		mes_reset_queue_pkt.reset_legacy_gfx = 1;
 		mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
 		mes_reset_queue_pkt.queue_id_lp = input->queue_id;
@@ -798,7 +791,6 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
 	.suspend_gang = mes_v11_0_suspend_gang,
 	.resume_gang = mes_v11_0_resume_gang,
 	.misc_op = mes_v11_0_misc_op,
-	.reset_legacy_queue = mes_v11_0_reset_legacy_queue,
 	.reset_hw_queue = mes_v11_0_reset_hw_queue,
 };
 
@@ -1701,22 +1693,10 @@ static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block)
 	return 0;
 }
 
-static int mes_v11_0_late_init(struct amdgpu_ip_block *ip_block)
-{
-	struct amdgpu_device *adev = ip_block->adev;
-
-	/* it's only intended for use in mes_self_test case, not for s0ix and reset */
-	if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend &&
-	    (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3)))
-		amdgpu_mes_self_test(adev);
-
-	return 0;
-}
-
 static const struct amd_ip_funcs mes_v11_0_ip_funcs = {
 	.name = "mes_v11_0",
 	.early_init = mes_v11_0_early_init,
-	.late_init = mes_v11_0_late_init,
+	.late_init = NULL,
 	.sw_init = mes_v11_0_sw_init,
 	.sw_fini = mes_v11_0_sw_fini,
 	.hw_init = mes_v11_0_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index e6ab617b9a40..b4f17332d466 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -274,6 +274,23 @@ static int convert_to_mes_queue_type(int queue_type)
 	return -1;
 }
 
+static int convert_to_mes_priority_level(int priority_level)
+{
+	switch (priority_level) {
+	case AMDGPU_MES_PRIORITY_LEVEL_LOW:
+		return AMD_PRIORITY_LEVEL_LOW;
+	case AMDGPU_MES_PRIORITY_LEVEL_NORMAL:
+	default:
+		return AMD_PRIORITY_LEVEL_NORMAL;
+	case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM:
+		return AMD_PRIORITY_LEVEL_MEDIUM;
+	case AMDGPU_MES_PRIORITY_LEVEL_HIGH:
+		return AMD_PRIORITY_LEVEL_HIGH;
+	case AMDGPU_MES_PRIORITY_LEVEL_REALTIME:
+		return AMD_PRIORITY_LEVEL_REALTIME;
+	}
+}
+
 static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes,
 				  struct mes_add_queue_input *input)
 {
@@ -297,9 +314,9 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes,
 	mes_add_queue_pkt.gang_quantum = input->gang_quantum;
 	mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
 	mes_add_queue_pkt.inprocess_gang_priority =
-		input->inprocess_gang_priority;
+		convert_to_mes_priority_level(input->inprocess_gang_priority);
 	mes_add_queue_pkt.gang_global_priority_level =
-		input->gang_global_priority_level;
+		convert_to_mes_priority_level(input->gang_global_priority_level);
 	mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
 	mes_add_queue_pkt.mqd_addr = input->mqd_addr;
 
@@ -477,32 +494,6 @@ static int mes_v12_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_typ
 	return r;
 }
 
-static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
-				    struct mes_reset_queue_input *input)
-{
-	union MESAPI__RESET mes_reset_queue_pkt;
-	int pipe;
-
-	memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
-
-	mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
-	mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
-	mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
-	mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
-	mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr;
-	/*mes_reset_queue_pkt.reset_queue_only = 1;*/
-
-	if (mes->adev->enable_uni_mes)
-		pipe = AMDGPU_MES_KIQ_PIPE;
-	else
-		pipe = AMDGPU_MES_SCHED_PIPE;
-
-	return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
-			&mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
-			offsetof(union MESAPI__REMOVE_QUEUE, api_status));
-}
-
 static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes,
 				      struct mes_map_legacy_queue_input *input)
 {
@@ -762,7 +753,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
 				pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE);
 	}
 
-	if (enforce_isolation)
+	if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
 		mes_set_hw_res_pkt.limit_single_process = 1;
 
 	return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
@@ -845,8 +836,8 @@ static void mes_v12_0_enable_unmapped_doorbell_handling(
 	WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data);
 }
 
-static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes,
-					struct mes_reset_legacy_queue_input *input)
+static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
+				    struct mes_reset_queue_input *input)
 {
 	union MESAPI__RESET mes_reset_queue_pkt;
 	int pipe;
@@ -865,7 +856,7 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes,
 	mes_reset_queue_pkt.queue_type =
 		convert_to_mes_queue_type(input->queue_type);
 
-	if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) {
+	if (input->legacy_gfx) {
 		mes_reset_queue_pkt.reset_legacy_gfx = 1;
 		mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
 		mes_reset_queue_pkt.queue_id_lp = input->queue_id;
@@ -878,7 +869,7 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes,
 		mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
 	}
 
-	if (mes->adev->enable_uni_mes)
+	if (input->is_kq)
 		pipe = AMDGPU_MES_KIQ_PIPE;
 	else
 		pipe = AMDGPU_MES_SCHED_PIPE;
@@ -896,7 +887,6 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
 	.suspend_gang = mes_v12_0_suspend_gang,
 	.resume_gang = mes_v12_0_resume_gang,
 	.misc_op = mes_v12_0_misc_op,
-	.reset_legacy_queue = mes_v12_0_reset_legacy_queue,
 	.reset_hw_queue = mes_v12_0_reset_hw_queue,
 };
 
@@ -1811,21 +1801,10 @@ static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block)
 	return 0;
 }
 
-static int mes_v12_0_late_init(struct amdgpu_ip_block *ip_block)
-{
-	struct amdgpu_device *adev = ip_block->adev;
-
-	/* it's only intended for use in mes_self_test case, not for s0ix and reset */
-	if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend)
-		amdgpu_mes_self_test(adev);
-
-	return 0;
-}
-
 static const struct amd_ip_funcs mes_v12_0_ip_funcs = {
 	.name = "mes_v12_0",
 	.early_init = mes_v12_0_early_init,
-	.late_init = mes_v12_0_late_init,
+	.late_init = NULL,
 	.sw_init = mes_v12_0_sw_init,
 	.sw_fini = mes_v12_0_sw_fini,
 	.hw_init = mes_v12_0_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index 84cde1239ee4..76167fadb292 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -30,6 +30,7 @@
 #include "soc15_common.h"
 #include "soc15.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_psp.h"
 
 #define regVM_L2_CNTL3_DEFAULT	0x80100007
 #define regVM_L2_CNTL4_DEFAULT	0x000000c1
@@ -192,10 +193,8 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev)
 	uint32_t tmp, inst_mask;
 	int i;
 
-	/* Setup TLB control */
-	inst_mask = adev->aid_mask;
-	for_each_inst(i, inst_mask) {
-		tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
+	if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) {
+		tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
 
 		tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
 				    1);
@@ -209,7 +208,26 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev)
 				    MTYPE, MTYPE_UC);/* XXX for emulation. */
 		tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
 
-		WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+		psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL);
+	} else {
+		inst_mask = adev->aid_mask;
+		for_each_inst(i, inst_mask) {
+			tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
+
+			tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+					    1);
+			tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+					    SYSTEM_ACCESS_MODE, 3);
+			tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+					    ENABLE_ADVANCED_DRIVER_MODEL, 1);
+			tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+					    SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+			tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+					    MTYPE, MTYPE_UC);/* XXX for emulation. */
+			tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+			WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+		}
 	}
 }
 
@@ -221,6 +239,9 @@ static void mmhub_v1_8_init_snoop_override_regs(struct amdgpu_device *adev)
 	uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE -
 			    regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE;
 
+	if (amdgpu_sriov_vf(adev))
+		return;
+
 	inst_mask = adev->aid_mask;
 	for_each_inst(i, inst_mask) {
 		for (j = 0; j < 5; j++) { /* DAGB instances */
@@ -454,6 +475,30 @@ static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev)
 	return 0;
 }
 
+static void mmhub_v1_8_disable_l1_tlb(struct amdgpu_device *adev)
+{
+	u32 tmp;
+	u32 i, inst_mask;
+
+	if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) {
+		tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
+		tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+		tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+				    ENABLE_ADVANCED_DRIVER_MODEL, 0);
+		psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL);
+	} else {
+		inst_mask = adev->aid_mask;
+		for_each_inst(i, inst_mask) {
+			tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
+			tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+					    0);
+			tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+					    ENABLE_ADVANCED_DRIVER_MODEL, 0);
+			WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+		}
+	}
+}
+
 static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
 {
 	struct amdgpu_vmhub *hub;
@@ -467,15 +512,6 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
 		for (i = 0; i < 16; i++)
 			WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT0_CNTL,
 					    i * hub->ctx_distance, 0);
-
-		/* Setup TLB control */
-		tmp = RREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL);
-		tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
-				    0);
-		tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
-				    ENABLE_ADVANCED_DRIVER_MODEL, 0);
-		WREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL, tmp);
-
 		if (!amdgpu_sriov_vf(adev)) {
 			/* Setup L2 cache */
 			tmp = RREG32_SOC15(MMHUB, j, regVM_L2_CNTL);
@@ -485,6 +521,8 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
 			WREG32_SOC15(MMHUB, j, regVM_L2_CNTL3, 0);
 		}
 	}
+
+	mmhub_v1_8_disable_l1_tlb(adev);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index f5411b798e11..48101a34e049 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -274,6 +274,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 {
 	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
 	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+	struct amdgpu_reset_context reset_context = { 0 };
 
 	amdgpu_virt_fini_data_exchange(adev);
 
@@ -281,8 +282,6 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 	if (amdgpu_device_should_recover_gpu(adev)
 		&& (!amdgpu_device_has_job_running(adev) ||
 			adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) {
-		struct amdgpu_reset_context reset_context;
-		memset(&reset_context, 0, sizeof(reset_context));
 
 		reset_context.method = AMD_RESET_METHOD_NONE;
 		reset_context.reset_req_dev = adev;
@@ -293,6 +292,19 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 	}
 }
 
+static void xgpu_ai_mailbox_bad_pages_work(struct work_struct *work)
+{
+	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work);
+	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+	if (down_read_trylock(&adev->reset_domain->sem)) {
+		amdgpu_virt_fini_data_exchange(adev);
+		amdgpu_virt_request_bad_pages(adev);
+		amdgpu_virt_init_data_exchange(adev);
+		up_read(&adev->reset_domain->sem);
+	}
+}
+
 static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
 				       struct amdgpu_irq_src *src,
 				       unsigned type,
@@ -312,26 +324,42 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
 				   struct amdgpu_iv_entry *entry)
 {
 	enum idh_event event = xgpu_ai_mailbox_peek_msg(adev);
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
 	switch (event) {
-		case IDH_FLR_NOTIFICATION:
+	case IDH_RAS_BAD_PAGES_NOTIFICATION:
+		xgpu_ai_mailbox_send_ack(adev);
+		if (amdgpu_sriov_runtime(adev))
+			schedule_work(&adev->virt.bad_pages_work);
+		break;
+	case IDH_UNRECOV_ERR_NOTIFICATION:
+		xgpu_ai_mailbox_send_ack(adev);
+		ras->is_rma = true;
+		dev_err(adev->dev, "VF is in an unrecoverable state. Runtime Services are halted.\n");
 		if (amdgpu_sriov_runtime(adev))
 			WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
-								&adev->virt.flr_work),
-				  "Failed to queue work! at %s",
-				  __func__);
+					&adev->virt.flr_work),
+					"Failed to queue work! at %s",
+					__func__);
 		break;
-		case IDH_QUERY_ALIVE:
-			xgpu_ai_mailbox_send_ack(adev);
-			break;
-		/* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
-		 * it byfar since that polling thread will handle it,
-		 * other msg like flr complete is not handled here.
-		 */
-		case IDH_CLR_MSG_BUF:
-		case IDH_FLR_NOTIFICATION_CMPL:
-		case IDH_READY_TO_ACCESS_GPU:
-		default:
+	case IDH_FLR_NOTIFICATION:
+		if (amdgpu_sriov_runtime(adev))
+			WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+						&adev->virt.flr_work),
+					"Failed to queue work! at %s",
+					__func__);
+		break;
+	case IDH_QUERY_ALIVE:
+		xgpu_ai_mailbox_send_ack(adev);
+		break;
+	/* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
+	 * it byfar since that polling thread will handle it,
+	 * other msg like flr complete is not handled here.
+	 */
+	case IDH_CLR_MSG_BUF:
+	case IDH_FLR_NOTIFICATION_CMPL:
+	case IDH_READY_TO_ACCESS_GPU:
+	default:
 		break;
 	}
 
@@ -387,6 +415,7 @@ int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev)
 	}
 
 	INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work);
+	INIT_WORK(&adev->virt.bad_pages_work, xgpu_ai_mailbox_bad_pages_work);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index ed57cbc150af..874b9f8f9804 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -40,6 +40,7 @@ enum idh_request {
 	IDH_LOG_VF_ERROR       = 200,
 	IDH_READY_TO_RESET 	= 201,
 	IDH_RAS_POISON  = 202,
+	IDH_REQ_RAS_BAD_PAGES = 205,
 };
 
 enum idh_event {
@@ -54,6 +55,9 @@ enum idh_event {
 	IDH_RAS_POISON_READY,
 	IDH_PF_SOFT_FLR_NOTIFICATION,
 	IDH_RAS_ERROR_DETECTED,
+	IDH_RAS_BAD_PAGES_READY = 15,
+	IDH_RAS_BAD_PAGES_NOTIFICATION = 16,
+	IDH_UNRECOV_ERR_NOTIFICATION = 17,
 	IDH_TEXT_MESSAGE = 255,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 5aadf24cb202..f6d8597452ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -67,6 +67,8 @@ static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev,
 	reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
 	if (reg == IDH_FAIL)
 		r = -EINVAL;
+	if (reg == IDH_UNRECOV_ERR_NOTIFICATION)
+		r = -ENODEV;
 	else if (reg != event)
 		return -ENOENT;
 
@@ -103,6 +105,7 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 {
 	int r;
 	uint64_t timeout, now;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
 	now = (uint64_t)ktime_to_ms(ktime_get());
 	timeout = now + NV_MAILBOX_POLL_MSG_TIMEDOUT;
@@ -110,8 +113,16 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 	do {
 		r = xgpu_nv_mailbox_rcv_msg(adev, event);
 		if (!r) {
-			dev_dbg(adev->dev, "rcv_msg 0x%x after %llu ms\n", event, NV_MAILBOX_POLL_MSG_TIMEDOUT - timeout + now);
+			dev_dbg(adev->dev, "rcv_msg 0x%x after %llu ms\n",
+					event, NV_MAILBOX_POLL_MSG_TIMEDOUT - timeout + now);
 			return 0;
+		} else if (r == -ENODEV) {
+			if (!amdgpu_ras_is_rma(adev)) {
+				ras->is_rma = true;
+				dev_err(adev->dev, "VF is in an unrecoverable state. "
+						"Runtime Services are halted.\n");
+			}
+			return r;
 		}
 
 		msleep(10);
@@ -166,6 +177,10 @@ static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev,
 	enum idh_event event = -1;
 
 send_request:
+
+	if (amdgpu_ras_is_rma(adev))
+		return -ENODEV;
+
 	xgpu_nv_mailbox_trans_msg(adev, req, data1, data2, data3);
 
 	switch (req) {
@@ -187,6 +202,9 @@ send_request:
 	case IDH_REQ_RAS_CPER_DUMP:
 		event = IDH_RAS_CPER_DUMP_READY;
 		break;
+	case IDH_REQ_RAS_BAD_PAGES:
+		event = IDH_RAS_BAD_PAGES_READY;
+		break;
 	default:
 		break;
 	}
@@ -320,6 +338,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
 {
 	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
 	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+	struct amdgpu_reset_context reset_context = { 0 };
 
 	amdgpu_virt_fini_data_exchange(adev);
 
@@ -330,8 +349,6 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
 		adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
 		adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
 		adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) {
-		struct amdgpu_reset_context reset_context;
-		memset(&reset_context, 0, sizeof(reset_context));
 
 		reset_context.method = AMD_RESET_METHOD_NONE;
 		reset_context.reset_req_dev = adev;
@@ -342,6 +359,19 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
 	}
 }
 
+static void xgpu_nv_mailbox_bad_pages_work(struct work_struct *work)
+{
+	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work);
+	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+	if (down_read_trylock(&adev->reset_domain->sem)) {
+		amdgpu_virt_fini_data_exchange(adev);
+		amdgpu_virt_request_bad_pages(adev);
+		amdgpu_virt_init_data_exchange(adev);
+		up_read(&adev->reset_domain->sem);
+	}
+}
+
 static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,
 				       struct amdgpu_irq_src *src,
 				       unsigned type,
@@ -364,8 +394,27 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev,
 				   struct amdgpu_iv_entry *entry)
 {
 	enum idh_event event = xgpu_nv_mailbox_peek_msg(adev);
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
 	switch (event) {
+	case IDH_RAS_BAD_PAGES_NOTIFICATION:
+		xgpu_nv_mailbox_send_ack(adev);
+		if (amdgpu_sriov_runtime(adev))
+			schedule_work(&adev->virt.bad_pages_work);
+		break;
+	case IDH_UNRECOV_ERR_NOTIFICATION:
+		xgpu_nv_mailbox_send_ack(adev);
+		if (!amdgpu_ras_is_rma(adev)) {
+			ras->is_rma = true;
+			dev_err(adev->dev, "VF is in an unrecoverable state. Runtime Services are halted.\n");
+		}
+
+		if (amdgpu_sriov_runtime(adev))
+			WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+						&adev->virt.flr_work),
+					"Failed to queue work! at %s",
+					__func__);
+		break;
 	case IDH_FLR_NOTIFICATION:
 		if (amdgpu_sriov_runtime(adev))
 			WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
@@ -436,6 +485,7 @@ int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev)
 	}
 
 	INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work);
+	INIT_WORK(&adev->virt.bad_pages_work, xgpu_nv_mailbox_bad_pages_work);
 
 	return 0;
 }
@@ -480,6 +530,11 @@ static int xgpu_nv_req_ras_cper_dump(struct amdgpu_device *adev, u64 vf_rptr)
 		adev, IDH_REQ_RAS_CPER_DUMP, vf_rptr_hi, vf_rptr_lo, 0);
 }
 
+static int xgpu_nv_req_ras_bad_pages(struct amdgpu_device *adev)
+{
+	return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_BAD_PAGES);
+}
+
 const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
 	.req_full_gpu	= xgpu_nv_request_full_gpu_access,
 	.rel_full_gpu	= xgpu_nv_release_full_gpu_access,
@@ -492,4 +547,5 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
 	.rcvd_ras_intr = xgpu_nv_rcvd_ras_intr,
 	.req_ras_err_count = xgpu_nv_req_ras_err_count,
 	.req_ras_cper_dump = xgpu_nv_req_ras_cper_dump,
+	.req_bad_pages = xgpu_nv_req_ras_bad_pages,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index 72c9fceb9d79..5808689562cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -42,6 +42,7 @@ enum idh_request {
 	IDH_RAS_POISON	= 202,
 	IDH_REQ_RAS_ERROR_COUNT = 203,
 	IDH_REQ_RAS_CPER_DUMP = 204,
+	IDH_REQ_RAS_BAD_PAGES = 205,
 };
 
 enum idh_event {
@@ -58,6 +59,9 @@ enum idh_event {
 	IDH_RAS_ERROR_DETECTED,
 	IDH_RAS_ERROR_COUNT_READY = 11,
 	IDH_RAS_CPER_DUMP_READY = 14,
+	IDH_RAS_BAD_PAGES_READY = 15,
+	IDH_RAS_BAD_PAGES_NOTIFICATION = 16,
+	IDH_UNRECOV_ERR_NOTIFICATION = 17,
 
 	IDH_TEXT_MESSAGE = 255,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index f23cb79110d6..a376f072700d 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -177,8 +177,12 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do
 {
 	u32 doorbell_range = 0, doorbell_ctrl = 0;
 	u32 aid_id = instance;
+	u32 range_size;
 
 	if (use_doorbell) {
+		range_size = (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+						IP_VERSION(9, 5, 0)) ?
+						0xb : 0x9;
 		doorbell_range = REG_SET_FIELD(doorbell_range,
 				DOORBELL0_CTRL_ENTRY_0,
 				BIF_DOORBELL0_RANGE_OFFSET_ENTRY,
@@ -186,7 +190,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do
 		doorbell_range = REG_SET_FIELD(doorbell_range,
 				DOORBELL0_CTRL_ENTRY_0,
 				BIF_DOORBELL0_RANGE_SIZE_ENTRY,
-				0x9);
+				range_size);
 		if (aid_id)
 			doorbell_range = REG_SET_FIELD(doorbell_range,
 					DOORBELL0_CTRL_ENTRY_0,
@@ -204,7 +208,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do
 				S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x4);
 		doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
 				S2A_DOORBELL_ENTRY_1_CTRL,
-				S2A_DOORBELL_PORT1_RANGE_SIZE, 0x9);
+				S2A_DOORBELL_PORT1_RANGE_SIZE, range_size);
 		doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
 				S2A_DOORBELL_ENTRY_1_CTRL,
 				S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x4);
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index fcd708eae75c..80153f837470 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -34,9 +34,6 @@
 #include "sdma0/sdma0_4_0_offset.h"
 #include "nbio/nbio_7_4_offset.h"
 
-#include "oss/osssys_4_0_offset.h"
-#include "oss/osssys_4_0_sh_mask.h"
-
 MODULE_FIRMWARE("amdgpu/renoir_asd.bin");
 MODULE_FIRMWARE("amdgpu/renoir_ta.bin");
 MODULE_FIRMWARE("amdgpu/green_sardine_asd.bin");
@@ -99,9 +96,6 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp)
 	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
 	       psp_gfxdrv_command_reg);
 
-	/* there might be handshake issue with hardware which needs delay */
-	mdelay(20);
-
 	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
 			   0x80000000, 0x80000000, false);
 
@@ -138,8 +132,6 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp)
 	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
 	       psp_gfxdrv_command_reg);
 
-	/* there might be handshake issue with hardware which needs delay */
-	mdelay(20);
 	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
 			   RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
 			   0, true);
@@ -147,37 +139,6 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp)
 	return ret;
 }
 
-static void psp_v12_0_reroute_ih(struct psp_context *psp)
-{
-	struct amdgpu_device *adev = psp->adev;
-	uint32_t tmp;
-
-	/* Change IH ring for VMC */
-	tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b);
-	tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1);
-	tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
-
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3);
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
-
-	mdelay(20);
-	psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
-		     0x80000000, 0x8000FFFF, false);
-
-	/* Change IH ring for UMC */
-	tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b);
-	tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
-
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4);
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
-
-	mdelay(20);
-	psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
-		     0x80000000, 0x8000FFFF, false);
-}
-
 static int psp_v12_0_ring_create(struct psp_context *psp,
 				enum psp_ring_type ring_type)
 {
@@ -186,49 +147,23 @@ static int psp_v12_0_ring_create(struct psp_context *psp,
 	struct psp_ring *ring = &psp->km_ring;
 	struct amdgpu_device *adev = psp->adev;
 
-	psp_v12_0_reroute_ih(psp);
-
-	if (amdgpu_sriov_vf(psp->adev)) {
-		/* Write low address of the ring to C2PMSG_102 */
-		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
-		/* Write high address of the ring to C2PMSG_103 */
-		psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
-
-		/* Write the ring initialization command to C2PMSG_101 */
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
-					     GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
-
-		/* there might be handshake issue with hardware which needs delay */
-		mdelay(20);
-
-		/* Wait for response flag (bit 31) in C2PMSG_101 */
-		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
-				   0x80000000, 0x8000FFFF, false);
-
-	} else {
-		/* Write low address of the ring to C2PMSG_69 */
-		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
-		/* Write high address of the ring to C2PMSG_70 */
-		psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
-		/* Write size of ring to C2PMSG_71 */
-		psp_ring_reg = ring->ring_size;
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
-		/* Write the ring initialization command to C2PMSG_64 */
-		psp_ring_reg = ring_type;
-		psp_ring_reg = psp_ring_reg << 16;
-		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
-
-		/* there might be handshake issue with hardware which needs delay */
-		mdelay(20);
-
-		/* Wait for response flag (bit 31) in C2PMSG_64 */
-		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
-				   0x80000000, 0x8000FFFF, false);
-	}
+	/* Write low address of the ring to C2PMSG_69 */
+	psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+	/* Write high address of the ring to C2PMSG_70 */
+	psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+	/* Write size of ring to C2PMSG_71 */
+	psp_ring_reg = ring->ring_size;
+	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+	/* Write the ring initialization command to C2PMSG_64 */
+	psp_ring_reg = ring_type;
+	psp_ring_reg = psp_ring_reg << 16;
+	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+	/* Wait for response flag (bit 31) in C2PMSG_64 */
+	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+			   0x80000000, 0x8000FFFF, false);
 
 	return ret;
 }
@@ -247,9 +182,6 @@ static int psp_v12_0_ring_stop(struct psp_context *psp,
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
 				     GFX_CTRL_CMD_ID_DESTROY_RINGS);
 
-	/* there might be handshake issue with hardware which needs delay */
-	mdelay(20);
-
 	/* Wait for response flag (bit 31) */
 	if (amdgpu_sriov_vf(adev))
 		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index afdf8ce3b4c5..f8af2cc63446 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -85,6 +85,8 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin");
 
 #define regMP1_PUB_SCRATCH0	0x3b10090
 
+#define PSP13_BL_STATUS_SIZE 100
+
 static int psp_v13_0_init_microcode(struct psp_context *psp)
 {
 	struct amdgpu_device *adev = psp->adev;
@@ -151,6 +153,32 @@ static bool psp_v13_0_is_sos_alive(struct psp_context *psp)
 	return sol_reg != 0x0;
 }
 
+static void psp_v13_0_bootloader_print_status(struct psp_context *psp,
+					      const char *msg)
+{
+	struct amdgpu_device *adev = psp->adev;
+	u32 bl_status_reg;
+	char bl_status_msg[PSP13_BL_STATUS_SIZE];
+	int i, at;
+
+	if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+	    amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+	    amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) {
+		at = 0;
+		for_each_inst(i, adev->aid_mask) {
+			bl_status_reg =
+				(SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_92)
+				 << 2) +
+				adev->asic_funcs->encode_ext_smn_addressing(i);
+			at += snprintf(bl_status_msg + at,
+				       PSP13_BL_STATUS_SIZE - at,
+				       " status(%02i): 0x%08x", i,
+				       RREG32_PCIE_EXT(bl_status_reg));
+		}
+		dev_info(adev->dev, "%s - %s", msg, bl_status_msg);
+	}
+}
+
 static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp)
 {
 	struct amdgpu_device *adev = psp->adev;
@@ -196,6 +224,9 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
 
 		if (ret == 0)
 			return 0;
+		if (retry_loop && !(retry_loop % 10))
+			psp_v13_0_bootloader_print_status(
+				psp, "Waiting for bootloader completion");
 	}
 
 	return ret;
@@ -858,6 +889,25 @@ static bool psp_v13_0_is_reload_needed(struct psp_context *psp)
 	return false;
 }
 
+static int psp_v13_0_reg_program_no_ring(struct psp_context *psp, uint32_t val,
+					 enum psp_reg_prog_id id)
+{
+	struct amdgpu_device *adev = psp->adev;
+	int ret = -EOPNOTSUPP;
+
+	/* PSP will broadcast the value to all instances */
+	if (amdgpu_sriov_vf(adev)) {
+		WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_GBR_IH_SET);
+		WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, id);
+		WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, val);
+
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+				   0x80000000, 0x80000000, false);
+	}
+
+	return ret;
+}
+
 static const struct psp_funcs psp_v13_0_funcs = {
 	.init_microcode = psp_v13_0_init_microcode,
 	.wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state,
@@ -884,6 +934,7 @@ static const struct psp_funcs psp_v13_0_funcs = {
 	.get_ras_capability = psp_v13_0_get_ras_capability,
 	.is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required,
 	.is_reload_needed = psp_v13_0_is_reload_needed,
+	.reg_program_no_ring = psp_v13_0_reg_program_no_ring,
 };
 
 void psp_v13_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 688a720bbbbd..9c169112a5e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -106,8 +106,9 @@ static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev);
 static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev);
 static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev);
 static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev);
-static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev);
 static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev);
+static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring);
 
 static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev,
 		u32 instance, u32 offset)
@@ -1333,6 +1334,11 @@ static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev)
 	}
 }
 
+static const struct amdgpu_sdma_funcs sdma_v4_4_2_sdma_funcs = {
+	.stop_kernel_queue = &sdma_v4_4_2_stop_queue,
+	.start_kernel_queue = &sdma_v4_4_2_restore_queue,
+};
+
 static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
@@ -1351,8 +1357,6 @@ static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block)
 	sdma_v4_4_2_set_vm_pte_funcs(adev);
 	sdma_v4_4_2_set_irq_funcs(adev);
 	sdma_v4_4_2_set_ras_funcs(adev);
-	sdma_v4_4_2_set_engine_reset_funcs(adev);
-
 	return 0;
 }
 
@@ -1447,6 +1451,7 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block)
 		/* Initialize guilty flags for GFX and PAGE queues */
 		adev->sdma.instance[i].gfx_guilty = false;
 		adev->sdma.instance[i].page_guilty = false;
+		adev->sdma.instance[i].funcs = &sdma_v4_4_2_sdma_funcs;
 
 		ring = &adev->sdma.instance[i].ring;
 		ring->ring_obj = NULL;
@@ -1678,11 +1683,12 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
 	return r;
 }
 
-static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_id)
+static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring)
 {
+	struct amdgpu_device *adev = ring->adev;
+	u32 instance_id = GET_INST(SDMA0, ring->me);
 	u32 inst_mask;
 	uint64_t rptr;
-	struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring;
 
 	if (amdgpu_sriov_vf(adev))
 		return -EINVAL;
@@ -1715,11 +1721,11 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_
 	return 0;
 }
 
-static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t instance_id)
+static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring)
 {
-	int i;
+	struct amdgpu_device *adev = ring->adev;
 	u32 inst_mask;
-	struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring;
+	int i;
 
 	inst_mask = 1 << ring->me;
 	udelay(50);
@@ -1739,16 +1745,6 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t instan
 	return sdma_v4_4_2_inst_start(adev, inst_mask, true);
 }
 
-static struct sdma_on_reset_funcs sdma_v4_4_2_engine_reset_funcs = {
-	.pre_reset = sdma_v4_4_2_stop_queue,
-	.post_reset = sdma_v4_4_2_restore_queue,
-};
-
-static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev)
-{
-	amdgpu_sdma_register_on_reset_callbacks(adev, &sdma_v4_4_2_engine_reset_funcs);
-}
-
 static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev,
 					struct amdgpu_irq_src *source,
 					unsigned type,
@@ -2373,7 +2369,9 @@ static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev)
 			adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
 		break;
 	case IP_VERSION(9, 5, 0):
-		/*TODO: enable the queue reset flag until fw supported */
+		if ((adev->gfx.mec_fw_version >= 0xf) && amdgpu_dpm_reset_sdma_is_supported(adev))
+			adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 0dce59f4f6e2..9505ae96fbec 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -112,6 +112,8 @@ static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev);
 static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev);
 static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev);
 static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring);
 
 static const struct soc15_reg_golden golden_settings_sdma_5[] = {
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
@@ -369,67 +371,36 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring)
 static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
-	uint32_t *wptr_saved;
-	uint32_t *is_queue_unmap;
-	uint64_t aggregated_db_index;
-	uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
 
 	DRM_DEBUG("Setting write pointer\n");
-	if (ring->is_mes_queue) {
-		wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
-		is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
-					      sizeof(uint32_t));
-		aggregated_db_index =
-			amdgpu_mes_get_aggregated_doorbell_index(adev,
-			AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
-
+	if (ring->use_doorbell) {
+		DRM_DEBUG("Using doorbell -- "
+			  "wptr_offs == 0x%08x "
+			  "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+			  "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+			  ring->wptr_offs,
+			  lower_32_bits(ring->wptr << 2),
+			  upper_32_bits(ring->wptr << 2));
+		/* XXX check if swapping is necessary on BE */
 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
 			     ring->wptr << 2);
-		*wptr_saved = ring->wptr << 2;
-		if (*is_queue_unmap) {
-			WDOORBELL64(aggregated_db_index, ring->wptr << 2);
-			DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
-					ring->doorbell_index, ring->wptr << 2);
-			WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
-		} else {
-			DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
-					ring->doorbell_index, ring->wptr << 2);
-			WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
-
-			if (*is_queue_unmap)
-				WDOORBELL64(aggregated_db_index,
-					    ring->wptr << 2);
-		}
+		DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+			  ring->doorbell_index, ring->wptr << 2);
+		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
 	} else {
-		if (ring->use_doorbell) {
-			DRM_DEBUG("Using doorbell -- "
-				  "wptr_offs == 0x%08x "
-				  "lower_32_bits(ring->wptr) << 2 == 0x%08x "
-				  "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
-				  ring->wptr_offs,
-				  lower_32_bits(ring->wptr << 2),
-				  upper_32_bits(ring->wptr << 2));
-			/* XXX check if swapping is necessary on BE */
-			atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-				     ring->wptr << 2);
-			DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
-				  ring->doorbell_index, ring->wptr << 2);
-			WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
-		} else {
-			DRM_DEBUG("Not using doorbell -- "
-				  "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
-				  "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
-				  ring->me,
-				  lower_32_bits(ring->wptr << 2),
-				  ring->me,
-				  upper_32_bits(ring->wptr << 2));
-			WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
-					     ring->me, mmSDMA0_GFX_RB_WPTR),
-					lower_32_bits(ring->wptr << 2));
-			WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
-					     ring->me, mmSDMA0_GFX_RB_WPTR_HI),
-					upper_32_bits(ring->wptr << 2));
-		}
+		DRM_DEBUG("Not using doorbell -- "
+			  "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
+			  "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+			  ring->me,
+			  lower_32_bits(ring->wptr << 2),
+			  ring->me,
+			  upper_32_bits(ring->wptr << 2));
+		WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
+							     ring->me, mmSDMA0_GFX_RB_WPTR),
+				lower_32_bits(ring->wptr << 2));
+		WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
+							     ring->me, mmSDMA0_GFX_RB_WPTR_HI),
+				upper_32_bits(ring->wptr << 2));
 	}
 }
 
@@ -575,11 +546,9 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
 	}
 
 	if (flags & AMDGPU_FENCE_FLAG_INT) {
-		uint32_t ctx = ring->is_mes_queue ?
-			(ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
 		/* generate an interrupt */
 		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
-		amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
+		amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
 	}
 }
 
@@ -588,15 +557,15 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
  * sdma_v5_0_gfx_stop - stop the gfx async dma engines
  *
  * @adev: amdgpu_device pointer
- *
+ * @inst_mask: mask of dma engine instances to be disabled
  * Stop the gfx async dma ring buffers (NAVI10).
  */
-static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask)
 {
 	u32 rb_cntl, ib_cntl;
 	int i;
 
-	for (i = 0; i < adev->sdma.num_instances; i++) {
+	for_each_inst(i, inst_mask) {
 		rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
 		WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
@@ -688,9 +657,11 @@ static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable)
 {
 	u32 f32_cntl;
 	int i;
+	uint32_t inst_mask;
 
+	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
 	if (!enable) {
-		sdma_v5_0_gfx_stop(adev);
+		sdma_v5_0_gfx_stop(adev, 1 << inst_mask);
 		sdma_v5_0_rlc_stop(adev);
 	}
 
@@ -1046,33 +1017,22 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
 	int r;
 	u32 tmp;
 	u64 gpu_addr;
-	volatile uint32_t *cpu_ptr = NULL;
 
 	tmp = 0xCAFEDEAD;
 
-	if (ring->is_mes_queue) {
-		uint32_t offset = 0;
-		offset = amdgpu_mes_ctx_get_offs(ring,
-					 AMDGPU_MES_CTX_PADDING_OFFS);
-		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-		*cpu_ptr = tmp;
-	} else {
-		r = amdgpu_device_wb_get(adev, &index);
-		if (r) {
-			dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
-			return r;
-		}
-
-		gpu_addr = adev->wb.gpu_addr + (index * 4);
-		adev->wb.wb[index] = cpu_to_le32(tmp);
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+		return r;
 	}
 
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	adev->wb.wb[index] = cpu_to_le32(tmp);
+
 	r = amdgpu_ring_alloc(ring, 20);
 	if (r) {
 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
-		if (!ring->is_mes_queue)
-			amdgpu_device_wb_free(adev, index);
+		amdgpu_device_wb_free(adev, index);
 		return r;
 	}
 
@@ -1085,10 +1045,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
 	amdgpu_ring_commit(ring);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
-		if (ring->is_mes_queue)
-			tmp = le32_to_cpu(*cpu_ptr);
-		else
-			tmp = le32_to_cpu(adev->wb.wb[index]);
+		tmp = le32_to_cpu(adev->wb.wb[index]);
 		if (tmp == 0xDEADBEEF)
 			break;
 		if (amdgpu_emu_mode == 1)
@@ -1100,8 +1057,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
 	if (i >= adev->usec_timeout)
 		r = -ETIMEDOUT;
 
-	if (!ring->is_mes_queue)
-		amdgpu_device_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 
 	return r;
 }
@@ -1124,38 +1080,24 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	long r;
 	u32 tmp = 0;
 	u64 gpu_addr;
-	volatile uint32_t *cpu_ptr = NULL;
 
 	tmp = 0xCAFEDEAD;
 	memset(&ib, 0, sizeof(ib));
 
-	if (ring->is_mes_queue) {
-		uint32_t offset = 0;
-		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
-		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
-		offset = amdgpu_mes_ctx_get_offs(ring,
-					 AMDGPU_MES_CTX_PADDING_OFFS);
-		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-		*cpu_ptr = tmp;
-	} else {
-		r = amdgpu_device_wb_get(adev, &index);
-		if (r) {
-			dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
-			return r;
-		}
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r) {
+		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+		return r;
+	}
 
-		gpu_addr = adev->wb.gpu_addr + (index * 4);
-		adev->wb.wb[index] = cpu_to_le32(tmp);
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	adev->wb.wb[index] = cpu_to_le32(tmp);
 
-		r = amdgpu_ib_get(adev, NULL, 256,
-					AMDGPU_IB_POOL_DIRECT, &ib);
-		if (r) {
-			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
-			goto err0;
-		}
+	r = amdgpu_ib_get(adev, NULL, 256,
+			  AMDGPU_IB_POOL_DIRECT, &ib);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+		goto err0;
 	}
 
 	ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
@@ -1183,10 +1125,7 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		goto err1;
 	}
 
-	if (ring->is_mes_queue)
-		tmp = le32_to_cpu(*cpu_ptr);
-	else
-		tmp = le32_to_cpu(adev->wb.wb[index]);
+	tmp = le32_to_cpu(adev->wb.wb[index]);
 
 	if (tmp == 0xDEADBEEF)
 		r = 0;
@@ -1197,8 +1136,7 @@ err1:
 	amdgpu_ib_free(&ib, NULL);
 	dma_fence_put(f);
 err0:
-	if (!ring->is_mes_queue)
-		amdgpu_device_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 	return r;
 }
 
@@ -1389,6 +1327,36 @@ static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
 	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
 }
 
+static int sdma_v5_0_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id)
+{
+	u32 grbm_soft_reset;
+	u32 tmp;
+
+	grbm_soft_reset = REG_SET_FIELD(0,
+					GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
+					1);
+	grbm_soft_reset <<= instance_id;
+
+	tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+	tmp |= grbm_soft_reset;
+	DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
+	WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+	tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+
+	udelay(50);
+
+	tmp &= ~grbm_soft_reset;
+	WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+	tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+	return 0;
+}
+
+static const struct amdgpu_sdma_funcs sdma_v5_0_sdma_funcs = {
+	.stop_kernel_queue = &sdma_v5_0_stop_queue,
+	.start_kernel_queue = &sdma_v5_0_restore_queue,
+	.soft_reset_kernel_queue = &sdma_v5_0_soft_reset_engine,
+};
+
 static int sdma_v5_0_early_init(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
@@ -1431,6 +1399,7 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
 		return r;
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
+		adev->sdma.instance[i].funcs = &sdma_v5_0_sdma_funcs;
 		ring = &adev->sdma.instance[i].ring;
 		ring->ring_obj = NULL;
 		ring->use_doorbell = true;
@@ -1572,32 +1541,25 @@ static int sdma_v5_0_soft_reset(struct amdgpu_ip_block *ip_block)
 static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
 {
 	struct amdgpu_device *adev = ring->adev;
-	int i, j, r;
-	u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, preempt, soft_reset, stat1_reg;
+	u32 inst_id = ring->me;
 
-	if (amdgpu_sriov_vf(adev))
-		return -EINVAL;
+	return amdgpu_sdma_reset_engine(adev, inst_id);
+}
 
-	for (i = 0; i < adev->sdma.num_instances; i++) {
-		if (ring == &adev->sdma.instance[i].ring)
-			break;
-	}
+static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring)
+{
+	u32 f32_cntl, freeze, cntl, stat1_reg;
+	struct amdgpu_device *adev = ring->adev;
+	int i, j, r = 0;
 
-	if (i == adev->sdma.num_instances) {
-		DRM_ERROR("sdma instance not found\n");
+	if (amdgpu_sriov_vf(adev))
 		return -EINVAL;
-	}
 
+	i = ring->me;
 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
 
 	/* stop queue */
-	ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
-	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
-	WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
-
-	rb_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
-	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
-	WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+	sdma_v5_0_gfx_stop(adev, 1 << i);
 
 	/* engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */
 	freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE));
@@ -1628,30 +1590,25 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
 	cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
 	cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0);
 	WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl);
+err0:
+	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+	return r;
+}
 
-	/* soft reset SDMA_GFX_PREEMPT.IB_PREEMPT = 0 mmGRBM_SOFT_RESET.SOFT_RESET_SDMA0/1 = 1 */
-	preempt = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT));
-	preempt = REG_SET_FIELD(preempt, SDMA0_GFX_PREEMPT, IB_PREEMPT, 0);
-	WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT), preempt);
-
-	soft_reset = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
-	soft_reset |= 1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i;
-
-	WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset);
-
-	udelay(50);
-
-	soft_reset &= ~(1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i);
-	WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset);
+static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	u32 inst_id = ring->me;
+	u32 freeze;
+	int r;
 
+	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
 	/* unfreeze*/
-	freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+	freeze = RREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE));
 	freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0);
-	WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze);
+	WREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze);
 
-	r = sdma_v5_0_gfx_resume_instance(adev, i, true);
-
-err0:
+	r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true);
 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 2b39a03ff0c1..a6e612b4a892 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -113,6 +113,8 @@ static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev);
 static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev);
 static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev);
 static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring);
 
 static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
 {
@@ -394,11 +396,9 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
 	}
 
 	if ((flags & AMDGPU_FENCE_FLAG_INT)) {
-		uint32_t ctx = ring->is_mes_queue ?
-			(ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
 		/* generate an interrupt */
 		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
-		amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
+		amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
 	}
 }
 
@@ -407,15 +407,15 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
  * sdma_v5_2_gfx_stop - stop the gfx async dma engines
  *
  * @adev: amdgpu_device pointer
- *
+ * @inst_mask: mask of dma engine instances to be disabled
  * Stop the gfx async dma ring buffers.
  */
-static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev,  uint32_t inst_mask)
 {
 	u32 rb_cntl, ib_cntl;
 	int i;
 
-	for (i = 0; i < adev->sdma.num_instances; i++) {
+	for_each_inst(i, inst_mask) {
 		rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
 		WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
@@ -506,9 +506,11 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
 {
 	u32 f32_cntl;
 	int i;
+	uint32_t inst_mask;
 
+	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
 	if (!enable) {
-		sdma_v5_2_gfx_stop(adev);
+		sdma_v5_2_gfx_stop(adev, inst_mask);
 		sdma_v5_2_rlc_stop(adev);
 	}
 
@@ -761,37 +763,49 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int sdma_v5_2_soft_reset(struct amdgpu_ip_block *ip_block)
+static int sdma_v5_2_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id)
 {
-	struct amdgpu_device *adev = ip_block->adev;
 	u32 grbm_soft_reset;
 	u32 tmp;
-	int i;
 
-	for (i = 0; i < adev->sdma.num_instances; i++) {
-		grbm_soft_reset = REG_SET_FIELD(0,
-						GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
-						1);
-		grbm_soft_reset <<= i;
+	grbm_soft_reset = REG_SET_FIELD(0,
+					GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
+					1);
+	grbm_soft_reset <<= instance_id;
 
-		tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
-		tmp |= grbm_soft_reset;
-		DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
-		WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
-		tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+	tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+	tmp |= grbm_soft_reset;
+	DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
+	WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+	tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
 
-		udelay(50);
+	udelay(50);
+
+	tmp &= ~grbm_soft_reset;
+	WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+	tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+	return 0;
+}
 
-		tmp &= ~grbm_soft_reset;
-		WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
-		tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+static int sdma_v5_2_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int i;
 
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		sdma_v5_2_soft_reset_engine(adev, i);
 		udelay(50);
 	}
 
 	return 0;
 }
 
+static const struct amdgpu_sdma_funcs sdma_v5_2_sdma_funcs = {
+	.stop_kernel_queue = &sdma_v5_2_stop_queue,
+	.start_kernel_queue = &sdma_v5_2_restore_queue,
+	.soft_reset_kernel_queue = &sdma_v5_2_soft_reset_engine,
+};
+
 /**
  * sdma_v5_2_start - setup and start the async dma engines
  *
@@ -903,33 +917,22 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
 	int r;
 	u32 tmp;
 	u64 gpu_addr;
-	volatile uint32_t *cpu_ptr = NULL;
 
 	tmp = 0xCAFEDEAD;
 
-	if (ring->is_mes_queue) {
-		uint32_t offset = 0;
-		offset = amdgpu_mes_ctx_get_offs(ring,
-					 AMDGPU_MES_CTX_PADDING_OFFS);
-		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-		*cpu_ptr = tmp;
-	} else {
-		r = amdgpu_device_wb_get(adev, &index);
-		if (r) {
-			dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
-			return r;
-		}
-
-		gpu_addr = adev->wb.gpu_addr + (index * 4);
-		adev->wb.wb[index] = cpu_to_le32(tmp);
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+		return r;
 	}
 
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	adev->wb.wb[index] = cpu_to_le32(tmp);
+
 	r = amdgpu_ring_alloc(ring, 20);
 	if (r) {
 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
-		if (!ring->is_mes_queue)
-			amdgpu_device_wb_free(adev, index);
+		amdgpu_device_wb_free(adev, index);
 		return r;
 	}
 
@@ -942,10 +945,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
 	amdgpu_ring_commit(ring);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
-		if (ring->is_mes_queue)
-			tmp = le32_to_cpu(*cpu_ptr);
-		else
-			tmp = le32_to_cpu(adev->wb.wb[index]);
+		tmp = le32_to_cpu(adev->wb.wb[index]);
 		if (tmp == 0xDEADBEEF)
 			break;
 		if (amdgpu_emu_mode == 1)
@@ -957,8 +957,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
 	if (i >= adev->usec_timeout)
 		r = -ETIMEDOUT;
 
-	if (!ring->is_mes_queue)
-		amdgpu_device_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 
 	return r;
 }
@@ -981,37 +980,23 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	long r;
 	u32 tmp = 0;
 	u64 gpu_addr;
-	volatile uint32_t *cpu_ptr = NULL;
 
 	tmp = 0xCAFEDEAD;
 	memset(&ib, 0, sizeof(ib));
 
-	if (ring->is_mes_queue) {
-		uint32_t offset = 0;
-		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
-		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
-		offset = amdgpu_mes_ctx_get_offs(ring,
-					 AMDGPU_MES_CTX_PADDING_OFFS);
-		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-		*cpu_ptr = tmp;
-	} else {
-		r = amdgpu_device_wb_get(adev, &index);
-		if (r) {
-			dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
-			return r;
-		}
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r) {
+		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+		return r;
+	}
 
-		gpu_addr = adev->wb.gpu_addr + (index * 4);
-		adev->wb.wb[index] = cpu_to_le32(tmp);
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	adev->wb.wb[index] = cpu_to_le32(tmp);
 
-		r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
-		if (r) {
-			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
-			goto err0;
-		}
+	r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+		goto err0;
 	}
 
 	ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
@@ -1039,10 +1024,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		goto err1;
 	}
 
-	if (ring->is_mes_queue)
-		tmp = le32_to_cpu(*cpu_ptr);
-	else
-		tmp = le32_to_cpu(adev->wb.wb[index]);
+	tmp = le32_to_cpu(adev->wb.wb[index]);
 
 	if (tmp == 0xDEADBEEF)
 		r = 0;
@@ -1053,8 +1035,7 @@ err1:
 	amdgpu_ib_free(&ib, NULL);
 	dma_fence_put(f);
 err0:
-	if (!ring->is_mes_queue)
-		amdgpu_device_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 	return r;
 }
 
@@ -1337,6 +1318,7 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
 	}
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
+		adev->sdma.instance[i].funcs = &sdma_v5_2_sdma_funcs;
 		ring = &adev->sdma.instance[i].ring;
 		ring->ring_obj = NULL;
 		ring->use_doorbell = true;
@@ -1472,32 +1454,25 @@ static int sdma_v5_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
 static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
 {
 	struct amdgpu_device *adev = ring->adev;
-	int i, j, r;
-	u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, preempt, soft_reset, stat1_reg;
+	u32 inst_id = ring->me;
 
-	if (amdgpu_sriov_vf(adev))
-		return -EINVAL;
+	return amdgpu_sdma_reset_engine(adev, inst_id);
+}
 
-	for (i = 0; i < adev->sdma.num_instances; i++) {
-		if (ring == &adev->sdma.instance[i].ring)
-			break;
-	}
+static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring)
+{
+	u32 f32_cntl, freeze, cntl, stat1_reg;
+	struct amdgpu_device *adev = ring->adev;
+	int i, j, r = 0;
 
-	if (i == adev->sdma.num_instances) {
-		DRM_ERROR("sdma instance not found\n");
+	if (amdgpu_sriov_vf(adev))
 		return -EINVAL;
-	}
 
+	i = ring->me;
 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
 
 	/* stop queue */
-	ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
-	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
-	WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
-
-	rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
-	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
-	WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+	sdma_v5_2_gfx_stop(adev, 1 << i);
 
 	/*engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */
 	freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE));
@@ -1530,31 +1505,26 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
 	cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0);
 	WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl);
 
-	/* soft reset SDMA_GFX_PREEMPT.IB_PREEMPT = 0 mmGRBM_SOFT_RESET.SOFT_RESET_SDMA0/1 = 1 */
-	preempt = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT));
-	preempt = REG_SET_FIELD(preempt, SDMA0_GFX_PREEMPT, IB_PREEMPT, 0);
-	WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT), preempt);
-
-	soft_reset = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
-	soft_reset |= 1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i;
-
-
-	WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset);
-
-	udelay(50);
-
-	soft_reset &= ~(1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i);
+err0:
+	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+	return r;
+}
 
-	WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset);
+static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	u32 inst_id = ring->me;
+	u32 freeze;
+	int r;
 
+	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
 	/* unfreeze and unhalt */
-	freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+	freeze = RREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE));
 	freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0);
-	WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze);
+	WREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze);
 
-	r = sdma_v5_2_gfx_resume_instance(adev, i, true);
+	r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true);
 
-err0:
 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index c214c3d2149b..da5b5d64f137 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -43,6 +43,7 @@
 #include "sdma_common.h"
 #include "sdma_v6_0.h"
 #include "v11_structs.h"
+#include "mes_userqueue.h"
 
 MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin");
 MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin");
@@ -376,11 +377,9 @@ static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
 	}
 
 	if (flags & AMDGPU_FENCE_FLAG_INT) {
-		uint32_t ctx = ring->is_mes_queue ?
-			(ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
 		/* generate an interrupt */
 		amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
-		amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
+		amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
 	}
 }
 
@@ -891,6 +890,9 @@ static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd,
 	m->sdmax_rlcx_rb_aql_cntl = regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT;
 	m->sdmax_rlcx_dummy_reg = regSDMA0_QUEUE0_DUMMY_REG_DEFAULT;
 
+	m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
+	m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
+
 	return 0;
 }
 
@@ -917,33 +919,22 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
 	int r;
 	u32 tmp;
 	u64 gpu_addr;
-	volatile uint32_t *cpu_ptr = NULL;
 
 	tmp = 0xCAFEDEAD;
 
-	if (ring->is_mes_queue) {
-		uint32_t offset = 0;
-		offset = amdgpu_mes_ctx_get_offs(ring,
-					 AMDGPU_MES_CTX_PADDING_OFFS);
-		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-		*cpu_ptr = tmp;
-	} else {
-		r = amdgpu_device_wb_get(adev, &index);
-		if (r) {
-			dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
-			return r;
-		}
-
-		gpu_addr = adev->wb.gpu_addr + (index * 4);
-		adev->wb.wb[index] = cpu_to_le32(tmp);
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+		return r;
 	}
 
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	adev->wb.wb[index] = cpu_to_le32(tmp);
+
 	r = amdgpu_ring_alloc(ring, 5);
 	if (r) {
 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
-		if (!ring->is_mes_queue)
-			amdgpu_device_wb_free(adev, index);
+		amdgpu_device_wb_free(adev, index);
 		return r;
 	}
 
@@ -956,10 +947,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
 	amdgpu_ring_commit(ring);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
-		if (ring->is_mes_queue)
-			tmp = le32_to_cpu(*cpu_ptr);
-		else
-			tmp = le32_to_cpu(adev->wb.wb[index]);
+		tmp = le32_to_cpu(adev->wb.wb[index]);
 		if (tmp == 0xDEADBEEF)
 			break;
 		if (amdgpu_emu_mode == 1)
@@ -971,8 +959,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
 	if (i >= adev->usec_timeout)
 		r = -ETIMEDOUT;
 
-	if (!ring->is_mes_queue)
-		amdgpu_device_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 
 	return r;
 }
@@ -995,37 +982,23 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	long r;
 	u32 tmp = 0;
 	u64 gpu_addr;
-	volatile uint32_t *cpu_ptr = NULL;
 
 	tmp = 0xCAFEDEAD;
 	memset(&ib, 0, sizeof(ib));
 
-	if (ring->is_mes_queue) {
-		uint32_t offset = 0;
-		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
-		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
-		offset = amdgpu_mes_ctx_get_offs(ring,
-					 AMDGPU_MES_CTX_PADDING_OFFS);
-		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-		*cpu_ptr = tmp;
-	} else {
-		r = amdgpu_device_wb_get(adev, &index);
-		if (r) {
-			dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
-			return r;
-		}
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r) {
+		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+		return r;
+	}
 
-		gpu_addr = adev->wb.gpu_addr + (index * 4);
-		adev->wb.wb[index] = cpu_to_le32(tmp);
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	adev->wb.wb[index] = cpu_to_le32(tmp);
 
-		r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
-		if (r) {
-			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
-			goto err0;
-		}
+	r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+		goto err0;
 	}
 
 	ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
@@ -1053,10 +1026,7 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		goto err1;
 	}
 
-	if (ring->is_mes_queue)
-		tmp = le32_to_cpu(*cpu_ptr);
-	else
-		tmp = le32_to_cpu(adev->wb.wb[index]);
+	tmp = le32_to_cpu(adev->wb.wb[index]);
 
 	if (tmp == 0xDEADBEEF)
 		r = 0;
@@ -1067,8 +1037,7 @@ err1:
 	amdgpu_ib_free(&ib, NULL);
 	dma_fence_put(f);
 err0:
-	if (!ring->is_mes_queue)
-		amdgpu_device_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 	return r;
 }
 
@@ -1300,6 +1269,23 @@ static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block)
 	struct amdgpu_device *adev = ip_block->adev;
 	int r;
 
+	switch (amdgpu_user_queue) {
+	case -1:
+	case 0:
+	default:
+		adev->sdma.no_user_submission = false;
+		adev->sdma.disable_uq = true;
+		break;
+	case 1:
+		adev->sdma.no_user_submission = false;
+		adev->sdma.disable_uq = false;
+		break;
+	case 2:
+		adev->sdma.no_user_submission = true;
+		adev->sdma.disable_uq = false;
+		break;
+	}
+
 	r = amdgpu_sdma_init_microcode(adev, 0, true);
 	if (r)
 		return r;
@@ -1334,6 +1320,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
 		ring->ring_obj = NULL;
 		ring->use_doorbell = true;
 		ring->me = i;
+		ring->no_user_submission = adev->sdma.no_user_submission;
 
 		DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
 				ring->use_doorbell?"true":"false");
@@ -1376,6 +1363,10 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
 	else
 		DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
 
+	/* add firmware version checks here */
+	if (0 && !adev->sdma.disable_uq)
+		adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+
 	r = amdgpu_sdma_sysfs_reset_mask_init(adev);
 	if (r)
 		return r;
@@ -1399,11 +1390,39 @@ static int sdma_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
 	return 0;
 }
 
+static int sdma_v6_0_set_userq_trap_interrupts(struct amdgpu_device *adev,
+					       bool enable)
+{
+	unsigned int irq_type;
+	int i, r;
+
+	if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) {
+		for (i = 0; i < adev->sdma.num_instances; i++) {
+			irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i;
+			if (enable)
+				r = amdgpu_irq_get(adev, &adev->sdma.trap_irq,
+						   irq_type);
+			else
+				r = amdgpu_irq_put(adev, &adev->sdma.trap_irq,
+						   irq_type);
+			if (r)
+				return r;
+		}
+	}
+
+	return 0;
+}
+
 static int sdma_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
+	int r;
 
-	return sdma_v6_0_start(adev);
+	r = sdma_v6_0_start(adev);
+	if (r)
+		return r;
+
+	return sdma_v6_0_set_userq_trap_interrupts(adev, true);
 }
 
 static int sdma_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
@@ -1415,6 +1434,7 @@ static int sdma_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
 
 	sdma_v6_0_ctxempty_int_enable(adev, false);
 	sdma_v6_0_enable(adev, false);
+	sdma_v6_0_set_userq_trap_interrupts(adev, false);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index b2706221df99..befe013b11a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -42,6 +42,7 @@
 #include "sdma_common.h"
 #include "sdma_v7_0.h"
 #include "v12_structs.h"
+#include "mes_userqueue.h"
 
 MODULE_FIRMWARE("amdgpu/sdma_7_0_0.bin");
 MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin");
@@ -204,66 +205,39 @@ static uint64_t sdma_v7_0_ring_get_wptr(struct amdgpu_ring *ring)
 static void sdma_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
-	uint32_t *wptr_saved;
-	uint32_t *is_queue_unmap;
-	uint64_t aggregated_db_index;
-	uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
 
 	DRM_DEBUG("Setting write pointer\n");
 
-	if (ring->is_mes_queue) {
-		wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
-		is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
-					      sizeof(uint32_t));
-		aggregated_db_index =
-			amdgpu_mes_get_aggregated_doorbell_index(adev,
-							 ring->hw_prio);
-
+	if (ring->use_doorbell) {
+		DRM_DEBUG("Using doorbell -- "
+			  "wptr_offs == 0x%08x "
+			  "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+			  "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+			  ring->wptr_offs,
+			  lower_32_bits(ring->wptr << 2),
+			  upper_32_bits(ring->wptr << 2));
+		/* XXX check if swapping is necessary on BE */
 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
 			     ring->wptr << 2);
-		*wptr_saved = ring->wptr << 2;
-		if (*is_queue_unmap) {
-			WDOORBELL64(aggregated_db_index, ring->wptr << 2);
-			DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
-					ring->doorbell_index, ring->wptr << 2);
-			WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
-		} else {
-			DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
-					ring->doorbell_index, ring->wptr << 2);
-			WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
-		}
+		DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+			  ring->doorbell_index, ring->wptr << 2);
+		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
 	} else {
-		if (ring->use_doorbell) {
-			DRM_DEBUG("Using doorbell -- "
-				  "wptr_offs == 0x%08x "
-				  "lower_32_bits(ring->wptr) << 2 == 0x%08x "
-				  "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
-				  ring->wptr_offs,
-				  lower_32_bits(ring->wptr << 2),
-				  upper_32_bits(ring->wptr << 2));
-			/* XXX check if swapping is necessary on BE */
-			atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-				     ring->wptr << 2);
-			DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
-				  ring->doorbell_index, ring->wptr << 2);
-			WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
-		} else {
-			DRM_DEBUG("Not using doorbell -- "
-				  "regSDMA%i_GFX_RB_WPTR == 0x%08x "
-				  "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
-				  ring->me,
-				  lower_32_bits(ring->wptr << 2),
-				  ring->me,
-				  upper_32_bits(ring->wptr << 2));
-			WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev,
-								     ring->me,
-								     regSDMA0_QUEUE0_RB_WPTR),
-					lower_32_bits(ring->wptr << 2));
-			WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev,
-								     ring->me,
-								     regSDMA0_QUEUE0_RB_WPTR_HI),
-					upper_32_bits(ring->wptr << 2));
-		}
+		DRM_DEBUG("Not using doorbell -- "
+			  "regSDMA%i_GFX_RB_WPTR == 0x%08x "
+			  "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+			  ring->me,
+			  lower_32_bits(ring->wptr << 2),
+			  ring->me,
+			  upper_32_bits(ring->wptr << 2));
+		WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev,
+							     ring->me,
+							     regSDMA0_QUEUE0_RB_WPTR),
+				lower_32_bits(ring->wptr << 2));
+		WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev,
+							     ring->me,
+							     regSDMA0_QUEUE0_RB_WPTR_HI),
+				upper_32_bits(ring->wptr << 2));
 	}
 }
 
@@ -407,11 +381,9 @@ static void sdma_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
 	}
 
 	if (flags & AMDGPU_FENCE_FLAG_INT) {
-		uint32_t ctx = ring->is_mes_queue ?
-			(ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
 		/* generate an interrupt */
 		amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
-		amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
+		amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
 	}
 }
 
@@ -935,6 +907,9 @@ static int sdma_v7_0_mqd_init(struct amdgpu_device *adev, void *mqd,
 	m->sdmax_rlcx_rb_aql_cntl = 0x4000;	//regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT;
 	m->sdmax_rlcx_dummy_reg = 0xf;	//regSDMA0_QUEUE0_DUMMY_REG_DEFAULT;
 
+	m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
+	m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
+
 	return 0;
 }
 
@@ -961,33 +936,22 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring)
 	int r;
 	u32 tmp;
 	u64 gpu_addr;
-	volatile uint32_t *cpu_ptr = NULL;
 
 	tmp = 0xCAFEDEAD;
 
-	if (ring->is_mes_queue) {
-		uint32_t offset = 0;
-		offset = amdgpu_mes_ctx_get_offs(ring,
-					 AMDGPU_MES_CTX_PADDING_OFFS);
-		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-		*cpu_ptr = tmp;
-	} else {
-		r = amdgpu_device_wb_get(adev, &index);
-		if (r) {
-			dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
-			return r;
-		}
-
-		gpu_addr = adev->wb.gpu_addr + (index * 4);
-		adev->wb.wb[index] = cpu_to_le32(tmp);
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+		return r;
 	}
 
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	adev->wb.wb[index] = cpu_to_le32(tmp);
+
 	r = amdgpu_ring_alloc(ring, 5);
 	if (r) {
 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
-		if (!ring->is_mes_queue)
-			amdgpu_device_wb_free(adev, index);
+		amdgpu_device_wb_free(adev, index);
 		return r;
 	}
 
@@ -1000,10 +964,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring)
 	amdgpu_ring_commit(ring);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
-		if (ring->is_mes_queue)
-			tmp = le32_to_cpu(*cpu_ptr);
-		else
-			tmp = le32_to_cpu(adev->wb.wb[index]);
+		tmp = le32_to_cpu(adev->wb.wb[index]);
 		if (tmp == 0xDEADBEEF)
 			break;
 		if (amdgpu_emu_mode == 1)
@@ -1015,8 +976,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring)
 	if (i >= adev->usec_timeout)
 		r = -ETIMEDOUT;
 
-	if (!ring->is_mes_queue)
-		amdgpu_device_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 
 	return r;
 }
@@ -1039,37 +999,23 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	long r;
 	u32 tmp = 0;
 	u64 gpu_addr;
-	volatile uint32_t *cpu_ptr = NULL;
 
 	tmp = 0xCAFEDEAD;
 	memset(&ib, 0, sizeof(ib));
 
-	if (ring->is_mes_queue) {
-		uint32_t offset = 0;
-		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
-		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
-		offset = amdgpu_mes_ctx_get_offs(ring,
-					 AMDGPU_MES_CTX_PADDING_OFFS);
-		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
-		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-		*cpu_ptr = tmp;
-	} else {
-		r = amdgpu_device_wb_get(adev, &index);
-		if (r) {
-			dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
-			return r;
-		}
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r) {
+		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+		return r;
+	}
 
-		gpu_addr = adev->wb.gpu_addr + (index * 4);
-		adev->wb.wb[index] = cpu_to_le32(tmp);
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	adev->wb.wb[index] = cpu_to_le32(tmp);
 
-		r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
-		if (r) {
-			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
-			goto err0;
-		}
+	r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+		goto err0;
 	}
 
 	ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
@@ -1097,10 +1043,7 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		goto err1;
 	}
 
-	if (ring->is_mes_queue)
-		tmp = le32_to_cpu(*cpu_ptr);
-	else
-		tmp = le32_to_cpu(adev->wb.wb[index]);
+	tmp = le32_to_cpu(adev->wb.wb[index]);
 
 	if (tmp == 0xDEADBEEF)
 		r = 0;
@@ -1111,8 +1054,7 @@ err1:
 	amdgpu_ib_free(&ib, NULL);
 	dma_fence_put(f);
 err0:
-	if (!ring->is_mes_queue)
-		amdgpu_device_wb_free(adev, index);
+	amdgpu_device_wb_free(adev, index);
 	return r;
 }
 
@@ -1312,6 +1254,23 @@ static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block)
 	struct amdgpu_device *adev = ip_block->adev;
 	int r;
 
+	switch (amdgpu_user_queue) {
+	case -1:
+	case 0:
+	default:
+		adev->sdma.no_user_submission = false;
+		adev->sdma.disable_uq = true;
+		break;
+	case 1:
+		adev->sdma.no_user_submission = false;
+		adev->sdma.disable_uq = false;
+		break;
+	case 2:
+		adev->sdma.no_user_submission = true;
+		adev->sdma.disable_uq = false;
+		break;
+	}
+
 	r = amdgpu_sdma_init_microcode(adev, 0, true);
 	if (r) {
 		DRM_ERROR("Failed to init sdma firmware!\n");
@@ -1347,6 +1306,7 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
 		ring->ring_obj = NULL;
 		ring->use_doorbell = true;
 		ring->me = i;
+		ring->no_user_submission = adev->sdma.no_user_submission;
 
 		DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
 				ring->use_doorbell?"true":"false");
@@ -1378,6 +1338,10 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
 	else
 		DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
 
+	/* add firmware version checks here */
+	if (0 && !adev->sdma.disable_uq)
+		adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+
 	return r;
 }
 
@@ -1400,11 +1364,39 @@ static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
 	return 0;
 }
 
+static int sdma_v7_0_set_userq_trap_interrupts(struct amdgpu_device *adev,
+					       bool enable)
+{
+	unsigned int irq_type;
+	int i, r;
+
+	if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) {
+		for (i = 0; i < adev->sdma.num_instances; i++) {
+			irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i;
+			if (enable)
+				r = amdgpu_irq_get(adev, &adev->sdma.trap_irq,
+						   irq_type);
+			else
+				r = amdgpu_irq_put(adev, &adev->sdma.trap_irq,
+						   irq_type);
+			if (r)
+				return r;
+		}
+	}
+
+	return 0;
+}
+
 static int sdma_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
+	int r;
 
-	return sdma_v7_0_start(adev);
+	r = sdma_v7_0_start(adev);
+	if (r)
+		return r;
+
+	return sdma_v7_0_set_userq_trap_interrupts(adev, true);
 }
 
 static int sdma_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
@@ -1416,6 +1408,7 @@ static int sdma_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
 
 	sdma_v7_0_ctx_switch_enable(adev, false);
 	sdma_v7_0_enable(adev, false);
+	sdma_v7_0_set_userq_trap_interrupts(adev, false);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 2247f6a94858..e0f139de7991 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -35,6 +35,7 @@
 #include "amdgpu_vce.h"
 #include "atom.h"
 #include "amd_pcie.h"
+
 #include "si_dpm.h"
 #include "sid.h"
 #include "si_ih.h"
@@ -44,17 +45,30 @@
 #include "dce_v6_0.h"
 #include "si.h"
 #include "uvd_v3_1.h"
-#include "amdgpu_vkms.h"
+
+#include "uvd/uvd_4_0_d.h"
+
+#include "smu/smu_6_0_d.h"
+#include "smu/smu_6_0_sh_mask.h"
+
 #include "gca/gfx_6_0_d.h"
+#include "gca/gfx_6_0_sh_mask.h"
+
 #include "oss/oss_1_0_d.h"
 #include "oss/oss_1_0_sh_mask.h"
+
 #include "gmc/gmc_6_0_d.h"
+#include"gmc/gmc_6_0_sh_mask.h"
+
 #include "dce/dce_6_0_d.h"
-#include "uvd/uvd_4_0_d.h"
+#include "dce/dce_6_0_sh_mask.h"
+
 #include "bif/bif_3_0_d.h"
 #include "bif/bif_3_0_sh_mask.h"
+#include "si_enums.h"
 
 #include "amdgpu_dm.h"
+#include "amdgpu_vkms.h"
 
 static const u32 tahiti_golden_registers[] =
 {
@@ -1071,8 +1085,8 @@ static u32 si_smc_rreg(struct amdgpu_device *adev, u32 reg)
 	u32 r;
 
 	spin_lock_irqsave(&adev->smc_idx_lock, flags);
-	WREG32(SMC_IND_INDEX_0, (reg));
-	r = RREG32(SMC_IND_DATA_0);
+	WREG32(mmSMC_IND_INDEX_0, (reg));
+	r = RREG32(mmSMC_IND_DATA_0);
 	spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
 	return r;
 }
@@ -1082,8 +1096,8 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 	unsigned long flags;
 
 	spin_lock_irqsave(&adev->smc_idx_lock, flags);
-	WREG32(SMC_IND_INDEX_0, (reg));
-	WREG32(SMC_IND_DATA_0, (v));
+	WREG32(mmSMC_IND_INDEX_0, (reg));
+	WREG32(mmSMC_IND_DATA_0, (v));
 	spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
 }
 
@@ -1110,20 +1124,20 @@ static void si_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 }
 
 static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
-	{GRBM_STATUS},
+	{mmGRBM_STATUS},
 	{mmGRBM_STATUS2},
 	{mmGRBM_STATUS_SE0},
 	{mmGRBM_STATUS_SE1},
 	{mmSRBM_STATUS},
 	{mmSRBM_STATUS2},
-	{DMA_STATUS_REG + DMA0_REGISTER_OFFSET},
-	{DMA_STATUS_REG + DMA1_REGISTER_OFFSET},
+	{mmDMA_STATUS_REG + DMA0_REGISTER_OFFSET},
+	{mmDMA_STATUS_REG + DMA1_REGISTER_OFFSET},
 	{mmCP_STAT},
 	{mmCP_STALLED_STAT1},
 	{mmCP_STALLED_STAT2},
 	{mmCP_STALLED_STAT3},
-	{GB_ADDR_CONFIG},
-	{MC_ARB_RAMCFG},
+	{mmGB_ADDR_CONFIG},
+	{mmMC_ARB_RAMCFG},
 	{mmGB_TILE_MODE0},
 	{mmGB_TILE_MODE1},
 	{mmGB_TILE_MODE2},
@@ -1156,7 +1170,7 @@ static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
 	{mmGB_TILE_MODE29},
 	{mmGB_TILE_MODE30},
 	{mmGB_TILE_MODE31},
-	{CC_RB_BACKEND_DISABLE, true},
+	{mmCC_RB_BACKEND_DISABLE, true},
 	{mmGC_USER_RB_BACKEND_DISABLE, true},
 	{mmPA_SC_RASTER_CONFIG, true},
 };
@@ -1264,37 +1278,37 @@ static bool si_read_disabled_bios(struct amdgpu_device *adev)
 	u32 rom_cntl;
 	bool r;
 
-	bus_cntl = RREG32(R600_BUS_CNTL);
+	bus_cntl = RREG32(mmBUS_CNTL);
 	if (adev->mode_info.num_crtc) {
-		d1vga_control = RREG32(AVIVO_D1VGA_CONTROL);
-		d2vga_control = RREG32(AVIVO_D2VGA_CONTROL);
-		vga_render_control = RREG32(VGA_RENDER_CONTROL);
+		d1vga_control = RREG32(mmD1VGA_CONTROL);
+		d2vga_control = RREG32(mmD2VGA_CONTROL);
+		vga_render_control = RREG32(mmVGA_RENDER_CONTROL);
 	}
 	rom_cntl = RREG32(R600_ROM_CNTL);
 
 	/* enable the rom */
-	WREG32(R600_BUS_CNTL, (bus_cntl & ~R600_BIOS_ROM_DIS));
+	WREG32(mmBUS_CNTL, (bus_cntl & ~BUS_CNTL__BIOS_ROM_DIS_MASK));
 	if (adev->mode_info.num_crtc) {
 		/* Disable VGA mode */
-		WREG32(AVIVO_D1VGA_CONTROL,
-		       (d1vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
-					  AVIVO_DVGA_CONTROL_TIMING_SELECT)));
-		WREG32(AVIVO_D2VGA_CONTROL,
-		       (d2vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
-					  AVIVO_DVGA_CONTROL_TIMING_SELECT)));
-		WREG32(VGA_RENDER_CONTROL,
-		       (vga_render_control & C_000300_VGA_VSTATUS_CNTL));
+		WREG32(mmD1VGA_CONTROL,
+		       (d1vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK |
+					  D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK)));
+		WREG32(mmD2VGA_CONTROL,
+		       (d2vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK |
+					  D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK)));
+		WREG32(mmVGA_RENDER_CONTROL,
+		       (vga_render_control & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK));
 	}
 	WREG32(R600_ROM_CNTL, rom_cntl | R600_SCK_OVERWRITE);
 
 	r = amdgpu_read_bios(adev);
 
 	/* restore regs */
-	WREG32(R600_BUS_CNTL, bus_cntl);
+	WREG32(mmBUS_CNTL, bus_cntl);
 	if (adev->mode_info.num_crtc) {
-		WREG32(AVIVO_D1VGA_CONTROL, d1vga_control);
-		WREG32(AVIVO_D2VGA_CONTROL, d2vga_control);
-		WREG32(VGA_RENDER_CONTROL, vga_render_control);
+		WREG32(mmD1VGA_CONTROL, d1vga_control);
+		WREG32(mmD2VGA_CONTROL, d2vga_control);
+		WREG32(mmVGA_RENDER_CONTROL, vga_render_control);
 	}
 	WREG32(R600_ROM_CNTL, rom_cntl);
 	return r;
@@ -1331,23 +1345,24 @@ static void si_set_clk_bypass_mode(struct amdgpu_device *adev)
 {
 	u32 tmp, i;
 
-	tmp = RREG32(CG_SPLL_FUNC_CNTL);
-	tmp |= SPLL_BYPASS_EN;
-	WREG32(CG_SPLL_FUNC_CNTL, tmp);
+	tmp = RREG32(mmCG_SPLL_FUNC_CNTL);
+	tmp |= CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK;
+	WREG32(mmCG_SPLL_FUNC_CNTL, tmp);
 
-	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
-	tmp |= SPLL_CTLREQ_CHG;
-	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+	tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2);
+	tmp |= CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK;
+	WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
-		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
+		if (RREG32(mmCG_SPLL_STATUS) & CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK)
 			break;
 		udelay(1);
 	}
 
-	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
-	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
-	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+	tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2);
+	tmp &= ~(CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK |
+		CG_SPLL_FUNC_CNTL_2__SCLK_MUX_UPDATE_MASK);
+	WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp);
 
 	tmp = RREG32(MPLL_CNTL_MODE);
 	tmp &= ~MPLL_MCLK_SEL;
@@ -1358,21 +1373,21 @@ static void si_spll_powerdown(struct amdgpu_device *adev)
 {
 	u32 tmp;
 
-	tmp = RREG32(SPLL_CNTL_MODE);
-	tmp |= SPLL_SW_DIR_CONTROL;
-	WREG32(SPLL_CNTL_MODE, tmp);
+	tmp = RREG32(mmSPLL_CNTL_MODE);
+	tmp |= SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK;
+	WREG32(mmSPLL_CNTL_MODE, tmp);
 
-	tmp = RREG32(CG_SPLL_FUNC_CNTL);
-	tmp |= SPLL_RESET;
-	WREG32(CG_SPLL_FUNC_CNTL, tmp);
+	tmp = RREG32(mmCG_SPLL_FUNC_CNTL);
+	tmp |= CG_SPLL_FUNC_CNTL__SPLL_RESET_MASK;
+	WREG32(mmCG_SPLL_FUNC_CNTL, tmp);
 
-	tmp = RREG32(CG_SPLL_FUNC_CNTL);
-	tmp |= SPLL_SLEEP;
-	WREG32(CG_SPLL_FUNC_CNTL, tmp);
+	tmp = RREG32(mmCG_SPLL_FUNC_CNTL);
+	tmp |= CG_SPLL_FUNC_CNTL__SPLL_SLEEP_MASK;
+	WREG32(mmCG_SPLL_FUNC_CNTL, tmp);
 
-	tmp = RREG32(SPLL_CNTL_MODE);
-	tmp &= ~SPLL_SW_DIR_CONTROL;
-	WREG32(SPLL_CNTL_MODE, tmp);
+	tmp = RREG32(mmSPLL_CNTL_MODE);
+	tmp &= ~SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK;
+	WREG32(mmSPLL_CNTL_MODE, tmp);
 }
 
 static int si_gpu_pci_config_reset(struct amdgpu_device *adev)
@@ -1454,14 +1469,14 @@ static void si_vga_set_state(struct amdgpu_device *adev, bool state)
 {
 	uint32_t temp;
 
-	temp = RREG32(CONFIG_CNTL);
+	temp = RREG32(mmCONFIG_CNTL);
 	if (!state) {
 		temp &= ~(1<<0);
 		temp |= (1<<1);
 	} else {
 		temp &= ~(1<<1);
 	}
-	WREG32(CONFIG_CNTL, temp);
+	WREG32(mmCONFIG_CNTL, temp);
 }
 
 static u32 si_get_xclk(struct amdgpu_device *adev)
@@ -1469,12 +1484,12 @@ static u32 si_get_xclk(struct amdgpu_device *adev)
 	u32 reference_clock = adev->clock.spll.reference_freq;
 	u32 tmp;
 
-	tmp = RREG32(CG_CLKPIN_CNTL_2);
-	if (tmp & MUX_TCLK_TO_XCLK)
+	tmp = RREG32(mmCG_CLKPIN_CNTL_2);
+	if (tmp & CG_CLKPIN_CNTL_2__MUX_TCLK_TO_XCLK_MASK)
 		return TCLK;
 
-	tmp = RREG32(CG_CLKPIN_CNTL);
-	if (tmp & XTALIN_DIVIDE)
+	tmp = RREG32(mmCG_CLKPIN_CNTL);
+	if (tmp & CG_CLKPIN_CNTL__XTALIN_DIVIDE_MASK)
 		return reference_clock / 4;
 
 	return reference_clock;
@@ -1519,9 +1534,9 @@ static int si_get_pcie_lanes(struct amdgpu_device *adev)
 	if (adev->flags & AMD_IS_APU)
 		return 0;
 
-	link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+	link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
 
-	switch ((link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT) {
+	switch ((link_width_cntl & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT) {
 	case LC_LINK_WIDTH_X1:
 		return 1;
 	case LC_LINK_WIDTH_X2:
@@ -1568,13 +1583,13 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes)
 		return;
 	}
 
-	link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
-	link_width_cntl &= ~LC_LINK_WIDTH_MASK;
-	link_width_cntl |= mask << LC_LINK_WIDTH_SHIFT;
-	link_width_cntl |= (LC_RECONFIG_NOW |
-			    LC_RECONFIG_ARC_MISSING_ESCAPE);
+	link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
+	link_width_cntl &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK;
+	link_width_cntl |= mask << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT;
+	link_width_cntl |= (PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK |
+		PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_ARC_MISSING_ESCAPE_MASK);
 
-	WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+	WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
 }
 
 static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
@@ -2018,7 +2033,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
 
 static uint32_t si_get_rev_id(struct amdgpu_device *adev)
 {
-	return (RREG32(CC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK)
+	return (RREG32(mmCC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK)
 		>> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT;
 }
 
@@ -2239,9 +2254,9 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 					CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
 		return;
 
-	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
-	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
-		LC_CURRENT_DATA_RATE_SHIFT;
+	speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL);
+	current_data_rate = (speed_cntl & PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) >>
+		PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
 	if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
 		if (current_data_rate == 2) {
 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
@@ -2268,17 +2283,17 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 			pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
 			pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
 
-			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
-			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
-			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
+			tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
+			max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH__SHIFT;
+			current_lw = (tmp & PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH__SHIFT;
 
 			if (current_lw < max_lw) {
-				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
-				if (tmp & LC_RENEGOTIATION_SUPPORT) {
-					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
-					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
-					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
-					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
+				tmp = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
+				if (tmp & PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATION_SUPPORT_MASK) {
+					tmp &= ~(PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_DIS_MASK);
+					tmp |= (max_lw << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT);
+					tmp |= PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_SUPPORT_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATE_EN_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK;
+					WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, tmp);
 				}
 			}
 
@@ -2301,13 +2316,13 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 							  PCI_EXP_LNKCTL2,
 							  &gpu_cfg2);
 
-				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
-				tmp |= LC_SET_QUIESCE;
-				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+				tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4);
+				tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
+				WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp);
 
-				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
-				tmp |= LC_REDO_EQ;
-				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+				tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4);
+				tmp |= PCIE_LC_CNTL4__LC_REDO_EQ_MASK;
+				WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp);
 
 				mdelay(100);
 
@@ -2333,16 +2348,16 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 								   (PCI_EXP_LNKCTL2_ENTER_COMP |
 								    PCI_EXP_LNKCTL2_TX_MARGIN));
 
-				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
-				tmp &= ~LC_SET_QUIESCE;
-				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+				tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4);
+				tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
+				WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp);
 			}
 		}
 	}
 
-	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
-	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
-	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+	speed_cntl |= PCIE_LC_SPEED_CNTL__LC_FORCE_EN_SW_SPEED_CHANGE_MASK | PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_HW_SPEED_CHANGE_MASK;
+	speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
+	WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl);
 
 	tmp16 = 0;
 	if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
@@ -2354,13 +2369,13 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
 	pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
 					   PCI_EXP_LNKCTL2_TLS, tmp16);
 
-	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
-	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
-	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+	speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL);
+	speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
+	WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
-		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
-		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
+		speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL);
+		if ((speed_cntl & PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK) == 0)
 			break;
 		udelay(1);
 	}
@@ -2418,121 +2433,121 @@ static void si_program_aspm(struct amdgpu_device *adev)
 	if (!amdgpu_device_should_use_aspm(adev))
 		return;
 
-	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
-	data &= ~LC_XMIT_N_FTS_MASK;
-	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
+	orig = data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL);
+	data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK;
+	data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) | PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_OVERRIDE_EN_MASK;
 	if (orig != data)
-		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
+		WREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL, data);
 
-	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
-	data |= LC_GO_TO_RECOVERY;
+	orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL3);
+	data |= PCIE_LC_CNTL3__LC_GO_TO_RECOVERY_MASK;
 	if (orig != data)
-		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
+		WREG32_PCIE_PORT(ixPCIE_LC_CNTL3, data);
 
-	orig = data = RREG32_PCIE(PCIE_P_CNTL);
-	data |= P_IGNORE_EDB_ERR;
+	orig = data = RREG32_PCIE(ixPCIE_P_CNTL);
+	data |= PCIE_P_CNTL__P_IGNORE_EDB_ERR_MASK;
 	if (orig != data)
-		WREG32_PCIE(PCIE_P_CNTL, data);
+		WREG32_PCIE(ixPCIE_P_CNTL, data);
 
-	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
-	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
-	data |= LC_PMI_TO_L1_DIS;
+	orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL);
+	data &= ~(PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK | PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK);
+	data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
 	if (!disable_l0s)
-		data |= LC_L0S_INACTIVITY(7);
+		data |= (7 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT);
 
 	if (!disable_l1) {
-		data |= LC_L1_INACTIVITY(7);
-		data &= ~LC_PMI_TO_L1_DIS;
+		data |= (7 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT);
+		data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
 		if (orig != data)
-			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+			WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data);
 
 		if (!disable_plloff_in_l1) {
 			bool clk_req_support;
 
-			orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0);
-			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
-			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+			orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0);
+			data &= ~(PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK);
+			data |= (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT);
 			if (orig != data)
-				si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data);
+				si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data);
 
-			orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1);
-			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
-			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+			orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1);
+			data &= ~(PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK);
+			data |= (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT);
 			if (orig != data)
-				si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data);
+				si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data);
 
-			orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0);
-			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
-			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+			orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0);
+			data &= ~(PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK);
+			data |= (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT);
 			if (orig != data)
-				si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data);
+				si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data);
 
-			orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1);
-			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
-			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+			orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1);
+			data &= ~(PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK);
+			data |= (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT);
 			if (orig != data)
-				si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data);
+				si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data);
 
 			if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) {
-				orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0);
-				data &= ~PLL_RAMP_UP_TIME_0_MASK;
+				orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0);
+				data &= ~PB0_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK;
 				if (orig != data)
-					si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data);
+					si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data);
 
-				orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1);
-				data &= ~PLL_RAMP_UP_TIME_1_MASK;
+				orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1);
+				data &= ~PB0_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK;
 				if (orig != data)
-					si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data);
+					si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data);
 
-				orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_2);
-				data &= ~PLL_RAMP_UP_TIME_2_MASK;
+				orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_2);
+				data &= ~PB0_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK;
 				if (orig != data)
-					si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_2, data);
+					si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_2, data);
 
-				orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_3);
-				data &= ~PLL_RAMP_UP_TIME_3_MASK;
+				orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_3);
+				data &= ~PB0_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK;
 				if (orig != data)
-					si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_3, data);
+					si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_3, data);
 
-				orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0);
-				data &= ~PLL_RAMP_UP_TIME_0_MASK;
+				orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0);
+				data &= ~PB1_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK;
 				if (orig != data)
-					si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data);
+					si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data);
 
-				orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1);
-				data &= ~PLL_RAMP_UP_TIME_1_MASK;
+				orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1);
+				data &= ~PB1_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK;
 				if (orig != data)
-					si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data);
+					si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data);
 
-				orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_2);
-				data &= ~PLL_RAMP_UP_TIME_2_MASK;
+				orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_2);
+				data &= ~PB1_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK;
 				if (orig != data)
-					si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_2, data);
+					si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_2, data);
 
-				orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_3);
-				data &= ~PLL_RAMP_UP_TIME_3_MASK;
+				orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_3);
+				data &= ~PB1_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK;
 				if (orig != data)
-					si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_3, data);
+					si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_3, data);
 			}
-			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
-			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
-			data |= LC_DYN_LANES_PWR_STATE(3);
+			orig = data = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
+			data &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE_MASK;
+			data |= (3 << PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE__SHIFT);
 			if (orig != data)
-				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
+				WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, data);
 
-			orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL);
-			data &= ~LS2_EXIT_TIME_MASK;
+			orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_CNTL);
+			data &= ~PB0_PIF_CNTL__LS2_EXIT_TIME_MASK;
 			if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
-				data |= LS2_EXIT_TIME(5);
+				data |= (5 << PB0_PIF_CNTL__LS2_EXIT_TIME__SHIFT);
 			if (orig != data)
-				si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data);
+				si_pif_phy0_wreg(adev,ixPB0_PIF_CNTL, data);
 
-			orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL);
-			data &= ~LS2_EXIT_TIME_MASK;
+			orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_CNTL);
+			data &= ~PB1_PIF_CNTL__LS2_EXIT_TIME_MASK;
 			if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
-				data |= LS2_EXIT_TIME(5);
+				data |= (5 << PB1_PIF_CNTL__LS2_EXIT_TIME__SHIFT);
 			if (orig != data)
-				si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data);
+				si_pif_phy1_wreg(adev,ixPB1_PIF_CNTL, data);
 
 			if (!disable_clkreq &&
 			    !pci_is_root_bus(adev->pdev->bus)) {
@@ -2548,64 +2563,64 @@ static void si_program_aspm(struct amdgpu_device *adev)
 			}
 
 			if (clk_req_support) {
-				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
-				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
+				orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL2);
+				data |= PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
 				if (orig != data)
-					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
+					WREG32_PCIE_PORT(ixPCIE_LC_CNTL2, data);
 
-				orig = data = RREG32(THM_CLK_CNTL);
-				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
-				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
+				orig = data = RREG32(mmTHM_CLK_CNTL);
+				data &= ~(THM_CLK_CNTL__CMON_CLK_SEL_MASK | THM_CLK_CNTL__TMON_CLK_SEL_MASK);
+				data |= (1 << THM_CLK_CNTL__CMON_CLK_SEL__SHIFT) | (1 << THM_CLK_CNTL__TMON_CLK_SEL__SHIFT);
 				if (orig != data)
-					WREG32(THM_CLK_CNTL, data);
+					WREG32(mmTHM_CLK_CNTL, data);
 
-				orig = data = RREG32(MISC_CLK_CNTL);
-				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
-				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
+				orig = data = RREG32(mmMISC_CLK_CNTL);
+				data &= ~(MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL_MASK | MISC_CLK_CNTL__ZCLK_SEL_MASK);
+				data |= (1 << MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL__SHIFT) | (1 << MISC_CLK_CNTL__ZCLK_SEL__SHIFT);
 				if (orig != data)
-					WREG32(MISC_CLK_CNTL, data);
+					WREG32(mmMISC_CLK_CNTL, data);
 
-				orig = data = RREG32(CG_CLKPIN_CNTL);
-				data &= ~BCLK_AS_XCLK;
+				orig = data = RREG32(mmCG_CLKPIN_CNTL);
+				data &= ~CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK;
 				if (orig != data)
-					WREG32(CG_CLKPIN_CNTL, data);
+					WREG32(mmCG_CLKPIN_CNTL, data);
 
-				orig = data = RREG32(CG_CLKPIN_CNTL_2);
-				data &= ~FORCE_BIF_REFCLK_EN;
+				orig = data = RREG32(mmCG_CLKPIN_CNTL_2);
+				data &= ~CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK;
 				if (orig != data)
-					WREG32(CG_CLKPIN_CNTL_2, data);
+					WREG32(mmCG_CLKPIN_CNTL_2, data);
 
-				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
-				data &= ~MPLL_CLKOUT_SEL_MASK;
-				data |= MPLL_CLKOUT_SEL(4);
+				orig = data = RREG32(mmMPLL_BYPASSCLK_SEL);
+				data &= ~MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK;
+				data |= 4 << MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT;
 				if (orig != data)
-					WREG32(MPLL_BYPASSCLK_SEL, data);
+					WREG32(mmMPLL_BYPASSCLK_SEL, data);
 
-				orig = data = RREG32(SPLL_CNTL_MODE);
-				data &= ~SPLL_REFCLK_SEL_MASK;
+				orig = data = RREG32(mmSPLL_CNTL_MODE);
+				data &= ~SPLL_CNTL_MODE__SPLL_REFCLK_SEL_MASK;
 				if (orig != data)
-					WREG32(SPLL_CNTL_MODE, data);
+					WREG32(mmSPLL_CNTL_MODE, data);
 			}
 		}
 	} else {
 		if (orig != data)
-			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+			WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data);
 	}
 
-	orig = data = RREG32_PCIE(PCIE_CNTL2);
-	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
+	orig = data = RREG32_PCIE(ixPCIE_CNTL2);
+	data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK | PCIE_CNTL2__MST_MEM_LS_EN_MASK | PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK;
 	if (orig != data)
-		WREG32_PCIE(PCIE_CNTL2, data);
+		WREG32_PCIE(ixPCIE_CNTL2, data);
 
 	if (!disable_l0s) {
-		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
-		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
-			data = RREG32_PCIE(PCIE_LC_STATUS1);
-			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
-				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
-				data &= ~LC_L0S_INACTIVITY_MASK;
+		data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL);
+		if((data & PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) == PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) {
+			data = RREG32_PCIE(ixPCIE_LC_STATUS1);
+			if ((data & PCIE_LC_STATUS1__LC_REVERSE_XMIT_MASK) && (data & PCIE_LC_STATUS1__LC_REVERSE_RCVR_MASK)) {
+				orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL);
+				data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
 				if (orig != data)
-					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+					WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data);
 			}
 		}
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index e2089c8da71b..7f18e4875287 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -27,6 +27,8 @@
 #include "si.h"
 #include "sid.h"
 
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
 const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
 {
 	DMA0_REGISTER_OFFSET,
@@ -38,17 +40,31 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev);
 static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev);
 static void si_dma_set_irq_funcs(struct amdgpu_device *adev);
 
+/**
+ * si_dma_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware (SI).
+ */
 static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring)
 {
 	return *ring->rptr_cpu_addr;
 }
 
+/**
+ * si_dma_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (SI).
+ */
 static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
 
-	return (RREG32(DMA_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
+	return (RREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
 }
 
 static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
@@ -56,7 +72,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
 	struct amdgpu_device *adev = ring->adev;
 	u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
 
-	WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
+	WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
 }
 
 static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
@@ -117,9 +133,9 @@ static void si_dma_stop(struct amdgpu_device *adev)
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		/* dma0 */
-		rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]);
-		rb_cntl &= ~DMA_RB_ENABLE;
-		WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
+		rb_cntl = RREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i]);
+		rb_cntl &= ~DMA_GFX_RB_CNTL__RB_ENABLE_MASK;
+		WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
 	}
 }
 
@@ -133,44 +149,44 @@ static int si_dma_start(struct amdgpu_device *adev)
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		ring = &adev->sdma.instance[i].ring;
 
-		WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
-		WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
+		WREG32(mmDMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
+		WREG32(mmDMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
 
 		/* Set ring buffer size in dwords */
 		rb_bufsz = order_base_2(ring->ring_size / 4);
 		rb_cntl = rb_bufsz << 1;
 #ifdef __BIG_ENDIAN
-		rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+		rb_cntl |= DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK | DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK;
 #endif
-		WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
+		WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
 
 		/* Initialize the ring buffer's read and write pointers */
-		WREG32(DMA_RB_RPTR + sdma_offsets[i], 0);
-		WREG32(DMA_RB_WPTR + sdma_offsets[i], 0);
+		WREG32(mmDMA_GFX_RB_RPTR + sdma_offsets[i], 0);
+		WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], 0);
 
 		rptr_addr = ring->rptr_gpu_addr;
 
-		WREG32(DMA_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr));
-		WREG32(DMA_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF);
+		WREG32(mmDMA_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr));
+		WREG32(mmDMA_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF);
 
-		rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+		rb_cntl |= DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK;
 
-		WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
+		WREG32(mmDMA_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
 
 		/* enable DMA IBs */
-		ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
+		ib_cntl = DMA_GFX_IB_CNTL__IB_ENABLE_MASK | DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK;
 #ifdef __BIG_ENDIAN
-		ib_cntl |= DMA_IB_SWAP_ENABLE;
+		ib_cntl |= DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK;
 #endif
-		WREG32(DMA_IB_CNTL + sdma_offsets[i], ib_cntl);
+		WREG32(mmDMA_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 
-		dma_cntl = RREG32(DMA_CNTL + sdma_offsets[i]);
-		dma_cntl &= ~CTXEMPTY_INT_ENABLE;
-		WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
+		dma_cntl = RREG32(mmDMA_CNTL + sdma_offsets[i]);
+		dma_cntl &= ~DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK;
+		WREG32(mmDMA_CNTL + sdma_offsets[i], dma_cntl);
 
 		ring->wptr = 0;
-		WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
-		WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
+		WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
+		WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_GFX_RB_CNTL__RB_ENABLE_MASK);
 
 		r = amdgpu_ring_test_helper(ring);
 		if (r)
@@ -461,7 +477,7 @@ static int si_dma_early_init(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
 
-	adev->sdma.num_instances = 2;
+	adev->sdma.num_instances = SDMA_MAX_INSTANCE;
 
 	si_dma_set_ring_funcs(adev);
 	si_dma_set_buffer_funcs(adev);
@@ -545,9 +561,9 @@ static bool si_dma_is_idle(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
 
-	u32 tmp = RREG32(SRBM_STATUS2);
+	u32 tmp = RREG32(mmSRBM_STATUS2);
 
-	if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK))
+	if (tmp & (SRBM_STATUS2__DMA_BUSY_MASK | SRBM_STATUS2__DMA1_BUSY_MASK))
 	    return false;
 
 	return true;
@@ -583,14 +599,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev,
 	case AMDGPU_SDMA_IRQ_INSTANCE0:
 		switch (state) {
 		case AMDGPU_IRQ_STATE_DISABLE:
-			sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
-			sdma_cntl &= ~TRAP_ENABLE;
-			WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
+			sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET);
+			sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK;
+			WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
 			break;
 		case AMDGPU_IRQ_STATE_ENABLE:
-			sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
-			sdma_cntl |= TRAP_ENABLE;
-			WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
+			sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET);
+			sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK;
+			WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
 			break;
 		default:
 			break;
@@ -599,14 +615,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev,
 	case AMDGPU_SDMA_IRQ_INSTANCE1:
 		switch (state) {
 		case AMDGPU_IRQ_STATE_DISABLE:
-			sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
-			sdma_cntl &= ~TRAP_ENABLE;
-			WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
+			sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET);
+			sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK;
+			WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
 			break;
 		case AMDGPU_IRQ_STATE_ENABLE:
-			sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
-			sdma_cntl |= TRAP_ENABLE;
-			WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
+			sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET);
+			sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK;
+			WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
 			break;
 		default:
 			break;
@@ -645,11 +661,11 @@ static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block,
 				offset = DMA0_REGISTER_OFFSET;
 			else
 				offset = DMA1_REGISTER_OFFSET;
-			orig = data = RREG32(DMA_POWER_CNTL + offset);
-			data &= ~MEM_POWER_OVERRIDE;
+			orig = data = RREG32(mmDMA_POWER_CNTL + offset);
+			data &= ~DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
 			if (data != orig)
-				WREG32(DMA_POWER_CNTL + offset, data);
-			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
+				WREG32(mmDMA_POWER_CNTL + offset, data);
+			WREG32(mmDMA_CLK_CTRL + offset, 0x00000100);
 		}
 	} else {
 		for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -657,15 +673,15 @@ static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block,
 				offset = DMA0_REGISTER_OFFSET;
 			else
 				offset = DMA1_REGISTER_OFFSET;
-			orig = data = RREG32(DMA_POWER_CNTL + offset);
-			data |= MEM_POWER_OVERRIDE;
+			orig = data = RREG32(mmDMA_POWER_CNTL + offset);
+			data |= DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
 			if (data != orig)
-				WREG32(DMA_POWER_CNTL + offset, data);
+				WREG32(mmDMA_POWER_CNTL + offset, data);
 
-			orig = data = RREG32(DMA_CLK_CTRL + offset);
+			orig = data = RREG32(mmDMA_CLK_CTRL + offset);
 			data = 0xff000000;
 			if (data != orig)
-				WREG32(DMA_CLK_CTRL + offset, data);
+				WREG32(mmDMA_CLK_CTRL + offset, data);
 		}
 	}
 
@@ -679,11 +695,11 @@ static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block,
 
 	struct amdgpu_device *adev = ip_block->adev;
 
-	WREG32(DMA_PGFSM_WRITE,  0x00002000);
-	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
+	WREG32(mmDMA_PGFSM_WRITE,  0x00002000);
+	WREG32(mmDMA_PGFSM_CONFIG, 0x100010ff);
 
 	for (tmp = 0; tmp < 5; tmp++)
-		WREG32(DMA_PGFSM_WRITE, 0);
+		WREG32(mmDMA_PGFSM_WRITE, 0);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h
index d656ef1fa6e1..6da65778292b 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_enums.h
+++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h
@@ -23,115 +23,15 @@
 #ifndef SI_ENUMS_H
 #define SI_ENUMS_H
 
-#define VBLANK_INT_MASK                (1 << 0)
-#define DC_HPDx_INT_EN                 (1 << 16)
-#define VBLANK_ACK                     (1 << 4)
-#define VLINE_ACK                      (1 << 4)
-
-#define CURSOR_WIDTH 64
-#define CURSOR_HEIGHT 64
-
-#define VGA_VSTATUS_CNTL               0xFFFCFFFF
 #define PRIORITY_MARK_MASK             0x7fff
 #define PRIORITY_OFF                   (1 << 16)
 #define PRIORITY_ALWAYS_ON             (1 << 20)
-#define INTERLEAVE_EN                  (1 << 0)
-
-#define LATENCY_WATERMARK_MASK(x)      ((x) << 16)
-#define DC_LB_MEMORY_CONFIG(x)         ((x) << 20)
-#define ICON_DEGAMMA_MODE(x)           (((x) & 0x3) << 8)
-
-#define GRPH_ENDIAN_SWAP(x)            (((x) & 0x3) << 0)
-#define GRPH_ENDIAN_NONE               0
-#define GRPH_ENDIAN_8IN16              1
-#define GRPH_ENDIAN_8IN32              2
-#define GRPH_ENDIAN_8IN64              3
-#define GRPH_RED_CROSSBAR(x)           (((x) & 0x3) << 4)
-#define GRPH_RED_SEL_R                 0
-#define GRPH_RED_SEL_G                 1
-#define GRPH_RED_SEL_B                 2
-#define GRPH_RED_SEL_A                 3
-#define GRPH_GREEN_CROSSBAR(x)         (((x) & 0x3) << 6)
-#define GRPH_GREEN_SEL_G               0
-#define GRPH_GREEN_SEL_B               1
-#define GRPH_GREEN_SEL_A               2
-#define GRPH_GREEN_SEL_R               3
-#define GRPH_BLUE_CROSSBAR(x)          (((x) & 0x3) << 8)
-#define GRPH_BLUE_SEL_B                0
-#define GRPH_BLUE_SEL_A                1
-#define GRPH_BLUE_SEL_R                2
-#define GRPH_BLUE_SEL_G                3
-#define GRPH_ALPHA_CROSSBAR(x)         (((x) & 0x3) << 10)
-#define GRPH_ALPHA_SEL_A               0
-#define GRPH_ALPHA_SEL_R               1
-#define GRPH_ALPHA_SEL_G               2
-#define GRPH_ALPHA_SEL_B               3
-
-#define GRPH_DEPTH(x)                  (((x) & 0x3) << 0)
-#define GRPH_DEPTH_8BPP                0
-#define GRPH_DEPTH_16BPP               1
-#define GRPH_DEPTH_32BPP               2
-
-#define GRPH_FORMAT(x)                 (((x) & 0x7) << 8)
-#define GRPH_FORMAT_INDEXED            0
-#define GRPH_FORMAT_ARGB1555           0
-#define GRPH_FORMAT_ARGB565            1
-#define GRPH_FORMAT_ARGB4444           2
-#define GRPH_FORMAT_AI88               3
-#define GRPH_FORMAT_MONO16             4
-#define GRPH_FORMAT_BGRA5551           5
-#define GRPH_FORMAT_ARGB8888           0
-#define GRPH_FORMAT_ARGB2101010        1
-#define GRPH_FORMAT_32BPP_DIG          2
-#define GRPH_FORMAT_8B_ARGB2101010     3
-#define GRPH_FORMAT_BGRA1010102        4
-#define GRPH_FORMAT_8B_BGRA1010102     5
-#define GRPH_FORMAT_RGB111110          6
-#define GRPH_FORMAT_BGR101111          7
-
-#define GRPH_NUM_BANKS(x)              (((x) & 0x3) << 2)
-#define GRPH_ARRAY_MODE(x)             (((x) & 0x7) << 20)
-#define GRPH_ARRAY_LINEAR_GENERAL      0
-#define GRPH_ARRAY_LINEAR_ALIGNED      1
-#define GRPH_ARRAY_1D_TILED_THIN1      2
-#define GRPH_ARRAY_2D_TILED_THIN1      4
-#define GRPH_TILE_SPLIT(x)             (((x) & 0x7) << 13)
-#define GRPH_BANK_WIDTH(x)             (((x) & 0x3) << 6)
-#define GRPH_BANK_HEIGHT(x)            (((x) & 0x3) << 11)
-#define GRPH_MACRO_TILE_ASPECT(x)      (((x) & 0x3) << 18)
-#define GRPH_ARRAY_MODE(x)             (((x) & 0x7) << 20)
-#define GRPH_PIPE_CONFIG(x)                   (((x) & 0x1f) << 24)
-
-#define CURSOR_EN                      (1 << 0)
-#define CURSOR_MODE(x)                 (((x) & 0x3) << 8)
-#define CURSOR_MONO                    0
-#define CURSOR_24_1                    1
-#define CURSOR_24_8_PRE_MULT           2
-#define CURSOR_24_8_UNPRE_MULT         3
-#define CURSOR_2X_MAGNIFY              (1 << 16)
-#define CURSOR_FORCE_MC_ON             (1 << 20)
-#define CURSOR_URGENT_CONTROL(x)       (((x) & 0x7) << 24)
-#define CURSOR_URGENT_ALWAYS           0
-#define CURSOR_URGENT_1_8              1
-#define CURSOR_URGENT_1_4              2
-#define CURSOR_URGENT_3_8              3
-#define CURSOR_URGENT_1_2              4
-#define CURSOR_UPDATE_PENDING          (1 << 0)
-#define CURSOR_UPDATE_TAKEN            (1 << 1)
-#define CURSOR_UPDATE_LOCK             (1 << 16)
-#define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
-
-
-#define ES_AND_GS_AUTO       3
-#define RADEON_PACKET_TYPE3  3
-#define CE_PARTITION_BASE    3
-#define BUF_SWAP_32BIT       (2 << 16)
 
 #define GFX_POWER_STATUS                           (1 << 1)
 #define GFX_CLOCK_STATUS                           (1 << 2)
 #define GFX_LS_STATUS                              (1 << 3)
-#define RLC_BUSY_STATUS                            (1 << 0)
 
+#define RLC_BUSY_STATUS                            (1 << 0)
 #define RLC_PUD(x)                               ((x) << 0)
 #define RLC_PUD_MASK                             (0xff << 0)
 #define RLC_PDD(x)                               ((x) << 8)
@@ -140,140 +40,8 @@
 #define RLC_TTPD_MASK                            (0xff << 16)
 #define RLC_MSD(x)                               ((x) << 24)
 #define RLC_MSD_MASK                             (0xff << 24)
-#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
-#define WRITE_DATA_DST_SEL(x) ((x) << 8)
-#define EVENT_TYPE(x) ((x) << 0)
-#define EVENT_INDEX(x) ((x) << 8)
-#define WAIT_REG_MEM_MEM_SPACE(x)               ((x) << 4)
-#define WAIT_REG_MEM_FUNCTION(x)                ((x) << 0)
-#define WAIT_REG_MEM_ENGINE(x)                  ((x) << 8)
 
-#define GFX6_NUM_GFX_RINGS     1
-#define GFX6_NUM_COMPUTE_RINGS 2
 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
 
-#define PACKET3(op, n)  ((RADEON_PACKET_TYPE3 << 30) |                  \
-                         (((op) & 0xFF) << 8) |                         \
-                         ((n) & 0x3FFF) << 16)
-#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
-#define	PACKET3_NOP					0x10
-#define	PACKET3_SET_BASE				0x11
-#define		PACKET3_BASE_INDEX(x)                  ((x) << 0)
-#define	PACKET3_CLEAR_STATE				0x12
-#define	PACKET3_INDEX_BUFFER_SIZE			0x13
-#define	PACKET3_DISPATCH_DIRECT				0x15
-#define	PACKET3_DISPATCH_INDIRECT			0x16
-#define	PACKET3_ALLOC_GDS				0x1B
-#define	PACKET3_WRITE_GDS_RAM				0x1C
-#define	PACKET3_ATOMIC_GDS				0x1D
-#define	PACKET3_ATOMIC					0x1E
-#define	PACKET3_OCCLUSION_QUERY				0x1F
-#define	PACKET3_SET_PREDICATION				0x20
-#define	PACKET3_REG_RMW					0x21
-#define	PACKET3_COND_EXEC				0x22
-#define	PACKET3_PRED_EXEC				0x23
-#define	PACKET3_DRAW_INDIRECT				0x24
-#define	PACKET3_DRAW_INDEX_INDIRECT			0x25
-#define	PACKET3_INDEX_BASE				0x26
-#define	PACKET3_DRAW_INDEX_2				0x27
-#define	PACKET3_CONTEXT_CONTROL				0x28
-#define	PACKET3_INDEX_TYPE				0x2A
-#define	PACKET3_DRAW_INDIRECT_MULTI			0x2C
-#define	PACKET3_DRAW_INDEX_AUTO				0x2D
-#define	PACKET3_DRAW_INDEX_IMMD				0x2E
-#define	PACKET3_NUM_INSTANCES				0x2F
-#define	PACKET3_DRAW_INDEX_MULTI_AUTO			0x30
-#define	PACKET3_INDIRECT_BUFFER_CONST			0x31
-#define	PACKET3_INDIRECT_BUFFER				0x3F
-#define	PACKET3_STRMOUT_BUFFER_UPDATE			0x34
-#define	PACKET3_DRAW_INDEX_OFFSET_2			0x35
-#define	PACKET3_DRAW_INDEX_MULTI_ELEMENT		0x36
-#define	PACKET3_WRITE_DATA				0x37
-#define	PACKET3_DRAW_INDEX_INDIRECT_MULTI		0x38
-#define	PACKET3_MEM_SEMAPHORE				0x39
-#define	PACKET3_MPEG_INDEX				0x3A
-#define	PACKET3_COPY_DW					0x3B
-#define	PACKET3_WAIT_REG_MEM				0x3C
-#define	PACKET3_MEM_WRITE				0x3D
-#define	PACKET3_COPY_DATA				0x40
-#define	PACKET3_CP_DMA					0x41
-#              define PACKET3_CP_DMA_DST_SEL(x)    ((x) << 20)
-#              define PACKET3_CP_DMA_ENGINE(x)     ((x) << 27)
-#              define PACKET3_CP_DMA_SRC_SEL(x)    ((x) << 29)
-#              define PACKET3_CP_DMA_CP_SYNC       (1 << 31)
-#              define PACKET3_CP_DMA_DIS_WC        (1 << 21)
-#              define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22)
-#              define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
-#              define PACKET3_CP_DMA_CMD_SAS       (1 << 26)
-#              define PACKET3_CP_DMA_CMD_DAS       (1 << 27)
-#              define PACKET3_CP_DMA_CMD_SAIC      (1 << 28)
-#              define PACKET3_CP_DMA_CMD_DAIC      (1 << 29)
-#              define PACKET3_CP_DMA_CMD_RAW_WAIT  (1 << 30)
-#define	PACKET3_PFP_SYNC_ME				0x42
-#define	PACKET3_SURFACE_SYNC				0x43
-#              define PACKET3_DEST_BASE_0_ENA      (1 << 0)
-#              define PACKET3_DEST_BASE_1_ENA      (1 << 1)
-#              define PACKET3_CB0_DEST_BASE_ENA    (1 << 6)
-#              define PACKET3_CB1_DEST_BASE_ENA    (1 << 7)
-#              define PACKET3_CB2_DEST_BASE_ENA    (1 << 8)
-#              define PACKET3_CB3_DEST_BASE_ENA    (1 << 9)
-#              define PACKET3_CB4_DEST_BASE_ENA    (1 << 10)
-#              define PACKET3_CB5_DEST_BASE_ENA    (1 << 11)
-#              define PACKET3_CB6_DEST_BASE_ENA    (1 << 12)
-#              define PACKET3_CB7_DEST_BASE_ENA    (1 << 13)
-#              define PACKET3_DB_DEST_BASE_ENA     (1 << 14)
-#              define PACKET3_DEST_BASE_2_ENA      (1 << 19)
-#              define PACKET3_DEST_BASE_3_ENA      (1 << 21)
-#              define PACKET3_TCL1_ACTION_ENA      (1 << 22)
-#              define PACKET3_TC_ACTION_ENA        (1 << 23)
-#              define PACKET3_CB_ACTION_ENA        (1 << 25)
-#              define PACKET3_DB_ACTION_ENA        (1 << 26)
-#              define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27)
-#              define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29)
-#define	PACKET3_ME_INITIALIZE				0x44
-#define		PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
-#define	PACKET3_COND_WRITE				0x45
-#define	PACKET3_EVENT_WRITE				0x46
-#define	PACKET3_EVENT_WRITE_EOP				0x47
-#define	PACKET3_EVENT_WRITE_EOS				0x48
-#define	PACKET3_PREAMBLE_CNTL				0x4A
-#              define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE     (2 << 28)
-#              define PACKET3_PREAMBLE_END_CLEAR_STATE       (3 << 28)
-#define	PACKET3_ONE_REG_WRITE				0x57
-#define	PACKET3_LOAD_CONFIG_REG				0x5F
-#define	PACKET3_LOAD_CONTEXT_REG			0x60
-#define	PACKET3_LOAD_SH_REG				0x61
-#define	PACKET3_SET_CONFIG_REG				0x68
-#define		PACKET3_SET_CONFIG_REG_START			0x00002000
-#define		PACKET3_SET_CONFIG_REG_END			0x00002c00
-#define	PACKET3_SET_CONTEXT_REG				0x69
-#define		PACKET3_SET_CONTEXT_REG_START			0x000a000
-#define		PACKET3_SET_CONTEXT_REG_END			0x000a400
-#define	PACKET3_SET_CONTEXT_REG_INDIRECT		0x73
-#define	PACKET3_SET_RESOURCE_INDIRECT			0x74
-#define	PACKET3_SET_SH_REG				0x76
-#define		PACKET3_SET_SH_REG_START			0x00002c00
-#define		PACKET3_SET_SH_REG_END				0x00003000
-#define	PACKET3_SET_SH_REG_OFFSET			0x77
-#define	PACKET3_ME_WRITE				0x7A
-#define	PACKET3_SCRATCH_RAM_WRITE			0x7D
-#define	PACKET3_SCRATCH_RAM_READ			0x7E
-#define	PACKET3_CE_WRITE				0x7F
-#define	PACKET3_LOAD_CONST_RAM				0x80
-#define	PACKET3_WRITE_CONST_RAM				0x81
-#define	PACKET3_WRITE_CONST_RAM_OFFSET			0x82
-#define	PACKET3_DUMP_CONST_RAM				0x83
-#define	PACKET3_INCREMENT_CE_COUNTER			0x84
-#define	PACKET3_INCREMENT_DE_COUNTER			0x85
-#define	PACKET3_WAIT_ON_CE_COUNTER			0x86
-#define	PACKET3_WAIT_ON_DE_COUNTER			0x87
-#define	PACKET3_WAIT_ON_DE_COUNTER_DIFF			0x88
-#define	PACKET3_SET_CE_DE_COUNTERS			0x89
-#define	PACKET3_WAIT_ON_AVAIL_BUFFER			0x8A
-#define	PACKET3_SWITCH_BUFFER				0x8B
-#define PACKET3_SEM_WAIT_ON_SIGNAL    (0x1 << 12)
-#define PACKET3_SEM_SEL_SIGNAL	    (0x6 << 29)
-#define PACKET3_SEM_SEL_WAIT	    (0x7 << 29)
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 5c38e1fb1dca..1df00f8a2406 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -27,6 +27,7 @@
 #include "amdgpu_ih.h"
 #include "sid.h"
 #include "si_ih.h"
+
 #include "oss/oss_1_0_d.h"
 #include "oss/oss_1_0_sh_mask.h"
 
@@ -213,7 +214,7 @@ static int si_ih_resume(struct amdgpu_ip_block *ip_block)
 static bool si_ih_is_idle(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
-	u32 tmp = RREG32(SRBM_STATUS);
+	u32 tmp = RREG32(mmSRBM_STATUS);
 
 	if (tmp & SRBM_STATUS__IH_BUSY_MASK)
 		return false;
@@ -239,23 +240,23 @@ static int si_ih_soft_reset(struct amdgpu_ip_block *ip_block)
 	struct amdgpu_device *adev = ip_block->adev;
 
 	u32 srbm_soft_reset = 0;
-	u32 tmp = RREG32(SRBM_STATUS);
+	u32 tmp = RREG32(mmSRBM_STATUS);
 
 	if (tmp & SRBM_STATUS__IH_BUSY_MASK)
 		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK;
 
 	if (srbm_soft_reset) {
-		tmp = RREG32(SRBM_SOFT_RESET);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
 		tmp |= srbm_soft_reset;
-		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
-		WREG32(SRBM_SOFT_RESET, tmp);
-		tmp = RREG32(SRBM_SOFT_RESET);
+		dev_info(adev->dev, "mmSRBM_SOFT_RESET=0x%08X\n", tmp);
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
 
 		udelay(50);
 
 		tmp &= ~srbm_soft_reset;
-		WREG32(SRBM_SOFT_RESET, tmp);
-		tmp = RREG32(SRBM_SOFT_RESET);
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
 
 		udelay(50);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h
index cbf232f5235b..cbd4f8951cfa 100644
--- a/drivers/gpu/drm/amd/amdgpu/sid.h
+++ b/drivers/gpu/drm/amd/amdgpu/sid.h
@@ -24,43 +24,12 @@
 #ifndef SI_H
 #define SI_H
 
-#define TAHITI_RB_BITMAP_WIDTH_PER_SH  2
-
-#define SI_MAX_SH_GPRS		 	256
-#define SI_MAX_TEMP_GPRS         	16
-#define SI_MAX_SH_THREADS        	256
-#define SI_MAX_SH_STACK_ENTRIES  	4096
-#define SI_MAX_FRC_EOV_CNT       	16384
-#define SI_MAX_BACKENDS          	8
-#define SI_MAX_BACKENDS_MASK     	0xFF
-#define SI_MAX_BACKENDS_PER_SE_MASK     0x0F
-#define SI_MAX_SIMDS             	12
-#define SI_MAX_SIMDS_MASK        	0x0FFF
-#define SI_MAX_SIMDS_PER_SE_MASK        0x00FF
-#define SI_MAX_PIPES            	8
-#define SI_MAX_PIPES_MASK        	0xFF
-#define SI_MAX_PIPES_PER_SIMD_MASK      0x3F
-#define SI_MAX_LDS_NUM           	0xFFFF
-#define SI_MAX_TCC               	16
-#define SI_MAX_TCC_MASK          	0xFFFF
 #define SI_MAX_CTLACKS_ASSERTION_WAIT   100
 
-/* SMC IND accessor regs */
-#define SMC_IND_INDEX_0                              0x80
-#define SMC_IND_DATA_0                               0x81
-
-#define SMC_IND_ACCESS_CNTL                          0x8A
-#       define AUTO_INCREMENT_IND_0                  (1 << 0)
-#define SMC_MESSAGE_0                                0x8B
-#define SMC_RESP_0                                   0x8C
-
 /* CG IND registers are accessed via SMC indirect space + SMC_CG_IND_START */
 #define SMC_CG_IND_START                    0xc0030000
 #define SMC_CG_IND_END                      0xc0040000
 
-#define	CG_CGTT_LOCAL_0				0x400
-#define	CG_CGTT_LOCAL_1				0x401
-
 /* SMC IND registers */
 #define	SMC_SYSCON_RESET_CNTL				0x80000000
 #       define RST_REG                                  (1 << 0)
@@ -68,9 +37,6 @@
 #       define CK_DISABLE                               (1 << 0)
 #       define CKEN                                     (1 << 24)
 
-#define VGA_HDP_CONTROL  				0xCA
-#define		VGA_MEMORY_DISABLE				(1 << 4)
-
 #define DCCG_DISP_SLOW_SELECT_REG                       0x13F
 #define		DCCG_DISP1_SLOW_SELECT(x)		((x) << 0)
 #define		DCCG_DISP1_SLOW_SELECT_MASK		(7 << 0)
@@ -79,47 +45,6 @@
 #define		DCCG_DISP2_SLOW_SELECT_MASK		(7 << 4)
 #define		DCCG_DISP2_SLOW_SELECT_SHIFT		4
 
-#define	CG_SPLL_FUNC_CNTL				0x180
-#define		SPLL_RESET				(1 << 0)
-#define		SPLL_SLEEP				(1 << 1)
-#define		SPLL_BYPASS_EN				(1 << 3)
-#define		SPLL_REF_DIV(x)				((x) << 4)
-#define		SPLL_REF_DIV_MASK			(0x3f << 4)
-#define		SPLL_PDIV_A(x)				((x) << 20)
-#define		SPLL_PDIV_A_MASK			(0x7f << 20)
-#define		SPLL_PDIV_A_SHIFT			20
-#define	CG_SPLL_FUNC_CNTL_2				0x181
-#define		SCLK_MUX_SEL(x)				((x) << 0)
-#define		SCLK_MUX_SEL_MASK			(0x1ff << 0)
-#define		SPLL_CTLREQ_CHG				(1 << 23)
-#define		SCLK_MUX_UPDATE				(1 << 26)
-#define	CG_SPLL_FUNC_CNTL_3				0x182
-#define		SPLL_FB_DIV(x)				((x) << 0)
-#define		SPLL_FB_DIV_MASK			(0x3ffffff << 0)
-#define		SPLL_FB_DIV_SHIFT			0
-#define		SPLL_DITHEN				(1 << 28)
-#define	CG_SPLL_FUNC_CNTL_4				0x183
-
-#define	SPLL_STATUS					0x185
-#define		SPLL_CHG_STATUS				(1 << 1)
-#define	SPLL_CNTL_MODE					0x186
-#define		SPLL_SW_DIR_CONTROL			(1 << 0)
-#	define SPLL_REFCLK_SEL(x)			((x) << 26)
-#	define SPLL_REFCLK_SEL_MASK			(3 << 26)
-
-#define	CG_SPLL_SPREAD_SPECTRUM				0x188
-#define		SSEN					(1 << 0)
-#define		CLK_S(x)				((x) << 4)
-#define		CLK_S_MASK				(0xfff << 4)
-#define		CLK_S_SHIFT				4
-#define	CG_SPLL_SPREAD_SPECTRUM_2			0x189
-#define		CLK_V(x)				((x) << 0)
-#define		CLK_V_MASK				(0x3ffffff << 0)
-#define		CLK_V_SHIFT				0
-
-#define	CG_SPLL_AUTOSCALE_CNTL				0x18b
-#       define AUTOSCALE_ON_SS_CLEAR                    (1 << 9)
-
 /* discrete uvd clocks */
 #define	CG_UPLL_FUNC_CNTL				0x18d
 #	define UPLL_RESET_MASK				0x00000001
@@ -149,317 +74,13 @@
 #define	CG_UPLL_SPREAD_SPECTRUM				0x194
 #	define SSEN_MASK				0x00000001
 
-#define	MPLL_BYPASSCLK_SEL				0x197
-#	define MPLL_CLKOUT_SEL(x)			((x) << 8)
-#	define MPLL_CLKOUT_SEL_MASK			0xFF00
-
-#define CG_CLKPIN_CNTL                                    0x198
-#       define XTALIN_DIVIDE                              (1 << 1)
-#       define BCLK_AS_XCLK                               (1 << 2)
-#define CG_CLKPIN_CNTL_2                                  0x199
-#       define FORCE_BIF_REFCLK_EN                        (1 << 3)
-#       define MUX_TCLK_TO_XCLK                           (1 << 8)
-
-#define	THM_CLK_CNTL					0x19b
-#	define CMON_CLK_SEL(x)				((x) << 0)
-#	define CMON_CLK_SEL_MASK			0xFF
-#	define TMON_CLK_SEL(x)				((x) << 8)
-#	define TMON_CLK_SEL_MASK			0xFF00
-#define	MISC_CLK_CNTL					0x19c
-#	define DEEP_SLEEP_CLK_SEL(x)			((x) << 0)
-#	define DEEP_SLEEP_CLK_SEL_MASK			0xFF
-#	define ZCLK_SEL(x)				((x) << 8)
-#	define ZCLK_SEL_MASK				0xFF00
-
-#define	CG_THERMAL_CTRL					0x1c0
-#define 	DPM_EVENT_SRC(x)			((x) << 0)
-#define 	DPM_EVENT_SRC_MASK			(7 << 0)
-#define		DIG_THERM_DPM(x)			((x) << 14)
-#define		DIG_THERM_DPM_MASK			0x003FC000
-#define		DIG_THERM_DPM_SHIFT			14
-#define	CG_THERMAL_STATUS				0x1c1
-#define		FDO_PWM_DUTY(x)				((x) << 9)
-#define		FDO_PWM_DUTY_MASK			(0xff << 9)
-#define		FDO_PWM_DUTY_SHIFT			9
-#define	CG_THERMAL_INT					0x1c2
-#define		DIG_THERM_INTH(x)			((x) << 8)
-#define		DIG_THERM_INTH_MASK			0x0000FF00
-#define		DIG_THERM_INTH_SHIFT			8
-#define		DIG_THERM_INTL(x)			((x) << 16)
-#define		DIG_THERM_INTL_MASK			0x00FF0000
-#define		DIG_THERM_INTL_SHIFT			16
-#define 	THERM_INT_MASK_HIGH			(1 << 24)
-#define 	THERM_INT_MASK_LOW			(1 << 25)
-
-#define	CG_MULT_THERMAL_CTRL					0x1c4
-#define		TEMP_SEL(x)					((x) << 20)
-#define		TEMP_SEL_MASK					(0xff << 20)
-#define		TEMP_SEL_SHIFT					20
-#define	CG_MULT_THERMAL_STATUS					0x1c5
-#define		ASIC_MAX_TEMP(x)				((x) << 0)
-#define		ASIC_MAX_TEMP_MASK				0x000001ff
-#define		ASIC_MAX_TEMP_SHIFT				0
-#define		CTF_TEMP(x)					((x) << 9)
-#define		CTF_TEMP_MASK					0x0003fe00
-#define		CTF_TEMP_SHIFT					9
-
-#define	CG_FDO_CTRL0					0x1d5
-#define		FDO_STATIC_DUTY(x)			((x) << 0)
-#define		FDO_STATIC_DUTY_MASK			0x000000FF
-#define		FDO_STATIC_DUTY_SHIFT			0
-#define	CG_FDO_CTRL1					0x1d6
-#define		FMAX_DUTY100(x)				((x) << 0)
-#define		FMAX_DUTY100_MASK			0x000000FF
-#define		FMAX_DUTY100_SHIFT			0
-#define	CG_FDO_CTRL2					0x1d7
-#define		TMIN(x)					((x) << 0)
-#define		TMIN_MASK				0x000000FF
-#define		TMIN_SHIFT				0
-#define		FDO_PWM_MODE(x)				((x) << 11)
-#define		FDO_PWM_MODE_MASK			(7 << 11)
-#define		FDO_PWM_MODE_SHIFT			11
-#define		TACH_PWM_RESP_RATE(x)			((x) << 25)
-#define		TACH_PWM_RESP_RATE_MASK			(0x7f << 25)
-#define		TACH_PWM_RESP_RATE_SHIFT		25
-
-#define CG_TACH_CTRL                                    0x1dc
-#       define EDGE_PER_REV(x)                          ((x) << 0)
-#       define EDGE_PER_REV_MASK                        (0x7 << 0)
-#       define EDGE_PER_REV_SHIFT                       0
-#       define TARGET_PERIOD(x)                         ((x) << 3)
-#       define TARGET_PERIOD_MASK                       0xfffffff8
-#       define TARGET_PERIOD_SHIFT                      3
-#define CG_TACH_STATUS                                  0x1dd
-#       define TACH_PERIOD(x)                           ((x) << 0)
-#       define TACH_PERIOD_MASK                         0xffffffff
-#       define TACH_PERIOD_SHIFT                        0
-
-#define GENERAL_PWRMGT                                  0x1e0
-#       define GLOBAL_PWRMGT_EN                         (1 << 0)
-#       define STATIC_PM_EN                             (1 << 1)
-#       define THERMAL_PROTECTION_DIS                   (1 << 2)
-#       define THERMAL_PROTECTION_TYPE                  (1 << 3)
-#       define SW_SMIO_INDEX(x)                         ((x) << 6)
-#       define SW_SMIO_INDEX_MASK                       (1 << 6)
-#       define SW_SMIO_INDEX_SHIFT                      6
-#       define VOLT_PWRMGT_EN                           (1 << 10)
-#       define DYN_SPREAD_SPECTRUM_EN                   (1 << 23)
-#define CG_TPC                                            0x1e1
-#define SCLK_PWRMGT_CNTL                                  0x1e2
-#       define SCLK_PWRMGT_OFF                            (1 << 0)
-#       define SCLK_LOW_D1                                (1 << 1)
-#       define FIR_RESET                                  (1 << 4)
-#       define FIR_FORCE_TREND_SEL                        (1 << 5)
-#       define FIR_TREND_MODE                             (1 << 6)
-#       define DYN_GFX_CLK_OFF_EN                         (1 << 7)
-#       define GFX_CLK_FORCE_ON                           (1 << 8)
-#       define GFX_CLK_REQUEST_OFF                        (1 << 9)
-#       define GFX_CLK_FORCE_OFF                          (1 << 10)
-#       define GFX_CLK_OFF_ACPI_D1                        (1 << 11)
-#       define GFX_CLK_OFF_ACPI_D2                        (1 << 12)
-#       define GFX_CLK_OFF_ACPI_D3                        (1 << 13)
-#       define DYN_LIGHT_SLEEP_EN                         (1 << 14)
-
-#define TARGET_AND_CURRENT_PROFILE_INDEX                  0x1e6
-#       define CURRENT_STATE_INDEX_MASK                   (0xf << 4)
-#       define CURRENT_STATE_INDEX_SHIFT                  4
-
-#define CG_FTV                                            0x1ef
-
-#define CG_FFCT_0                                         0x1f0
-#       define UTC_0(x)                                   ((x) << 0)
-#       define UTC_0_MASK                                 (0x3ff << 0)
-#       define DTC_0(x)                                   ((x) << 10)
-#       define DTC_0_MASK                                 (0x3ff << 10)
-
-#define CG_BSP                                          0x1ff
-#       define BSP(x)					((x) << 0)
-#       define BSP_MASK					(0xffff << 0)
-#       define BSU(x)					((x) << 16)
-#       define BSU_MASK					(0xf << 16)
-#define CG_AT                                           0x200
-#       define CG_R(x)					((x) << 0)
-#       define CG_R_MASK				(0xffff << 0)
-#       define CG_L(x)					((x) << 16)
-#       define CG_L_MASK				(0xffff << 16)
-
-#define CG_GIT                                          0x201
-#       define CG_GICST(x)                              ((x) << 0)
-#       define CG_GICST_MASK                            (0xffff << 0)
-#       define CG_GIPOT(x)                              ((x) << 16)
-#       define CG_GIPOT_MASK                            (0xffff << 16)
-
-#define CG_SSP                                            0x203
-#       define SST(x)                                     ((x) << 0)
-#       define SST_MASK                                   (0xffff << 0)
-#       define SSTU(x)                                    ((x) << 16)
-#       define SSTU_MASK                                  (0xf << 16)
-
-#define CG_DISPLAY_GAP_CNTL                               0x20a
-#       define DISP1_GAP(x)                               ((x) << 0)
-#       define DISP1_GAP_MASK                             (3 << 0)
-#       define DISP2_GAP(x)                               ((x) << 2)
-#       define DISP2_GAP_MASK                             (3 << 2)
-#       define VBI_TIMER_COUNT(x)                         ((x) << 4)
-#       define VBI_TIMER_COUNT_MASK                       (0x3fff << 4)
-#       define VBI_TIMER_UNIT(x)                          ((x) << 20)
-#       define VBI_TIMER_UNIT_MASK                        (7 << 20)
-#       define DISP1_GAP_MCHG(x)                          ((x) << 24)
-#       define DISP1_GAP_MCHG_MASK                        (3 << 24)
-#       define DISP2_GAP_MCHG(x)                          ((x) << 26)
-#       define DISP2_GAP_MCHG_MASK                        (3 << 26)
-
-#define	CG_ULV_CONTROL					0x21e
-#define	CG_ULV_PARAMETER				0x21f
-
-#define	SMC_SCRATCH0					0x221
-
-#define	CG_CAC_CTRL					0x22e
-#	define CAC_WINDOW(x)				((x) << 0)
-#	define CAC_WINDOW_MASK				0x00ffffff
-
-#define DMIF_ADDR_CONFIG  				0x2F5
-
-#define DMIF_ADDR_CALC  				0x300
-
-#define	PIPE0_DMIF_BUFFER_CONTROL			  0x0328
-#       define DMIF_BUFFERS_ALLOCATED(x)                  ((x) << 0)
-#       define DMIF_BUFFERS_ALLOCATED_COMPLETED           (1 << 4)
-
-#define	SRBM_STATUS				        0x394
-#define		GRBM_RQ_PENDING 			(1 << 5)
-#define		VMC_BUSY 				(1 << 8)
-#define		MCB_BUSY 				(1 << 9)
-#define		MCB_NON_DISPLAY_BUSY 			(1 << 10)
-#define		MCC_BUSY 				(1 << 11)
-#define		MCD_BUSY 				(1 << 12)
-#define		SEM_BUSY 				(1 << 14)
-#define		IH_BUSY 				(1 << 17)
-
-#define	SRBM_SOFT_RESET				        0x398
-#define		SOFT_RESET_BIF				(1 << 1)
-#define		SOFT_RESET_DC				(1 << 5)
-#define		SOFT_RESET_DMA1				(1 << 6)
-#define		SOFT_RESET_GRBM				(1 << 8)
-#define		SOFT_RESET_HDP				(1 << 9)
-#define		SOFT_RESET_IH				(1 << 10)
-#define		SOFT_RESET_MC				(1 << 11)
-#define		SOFT_RESET_ROM				(1 << 14)
-#define		SOFT_RESET_SEM				(1 << 15)
-#define		SOFT_RESET_VMC				(1 << 17)
-#define		SOFT_RESET_DMA				(1 << 20)
-#define		SOFT_RESET_TST				(1 << 21)
-#define		SOFT_RESET_REGBB			(1 << 22)
-#define		SOFT_RESET_ORB				(1 << 23)
-
-#define	CC_SYS_RB_BACKEND_DISABLE			0x3A0
-#define	GC_USER_SYS_RB_BACKEND_DISABLE			0x3A1
-
-#define SRBM_READ_ERROR					0x3A6
-#define SRBM_INT_CNTL					0x3A8
-#define SRBM_INT_ACK					0x3AA
-
-#define	SRBM_STATUS2				        0x3B1
-#define		DMA_BUSY 				(1 << 5)
-#define		DMA1_BUSY 				(1 << 6)
-
-#define VM_L2_CNTL					0x500
-#define		ENABLE_L2_CACHE					(1 << 0)
-#define		ENABLE_L2_FRAGMENT_PROCESSING			(1 << 1)
-#define		L2_CACHE_PTE_ENDIAN_SWAP_MODE(x)		((x) << 2)
-#define		L2_CACHE_PDE_ENDIAN_SWAP_MODE(x)		((x) << 4)
-#define		ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE		(1 << 9)
-#define		ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE	(1 << 10)
-#define		EFFECTIVE_L2_QUEUE_SIZE(x)			(((x) & 7) << 15)
-#define		CONTEXT1_IDENTITY_ACCESS_MODE(x)		(((x) & 3) << 19)
-#define VM_L2_CNTL2					0x501
-#define		INVALIDATE_ALL_L1_TLBS				(1 << 0)
-#define		INVALIDATE_L2_CACHE				(1 << 1)
-#define		INVALIDATE_CACHE_MODE(x)			((x) << 26)
-#define			INVALIDATE_PTE_AND_PDE_CACHES		0
-#define			INVALIDATE_ONLY_PTE_CACHES		1
-#define			INVALIDATE_ONLY_PDE_CACHES		2
-#define VM_L2_CNTL3					0x502
-#define		BANK_SELECT(x)					((x) << 0)
-#define		L2_CACHE_UPDATE_MODE(x)				((x) << 6)
-#define		L2_CACHE_BIGK_FRAGMENT_SIZE(x)			((x) << 15)
-#define		L2_CACHE_BIGK_ASSOCIATIVITY			(1 << 20)
-#define	VM_L2_STATUS					0x503
-#define		L2_BUSY						(1 << 0)
-#define VM_CONTEXT0_CNTL				0x504
-#define		ENABLE_CONTEXT					(1 << 0)
-#define		PAGE_TABLE_DEPTH(x)				(((x) & 3) << 1)
-#define		RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 3)
-#define		RANGE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 4)
-#define		DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT	(1 << 6)
-#define		DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT	(1 << 7)
-#define		PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 9)
-#define		PDE0_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 10)
-#define		VALID_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 12)
-#define		VALID_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 13)
-#define		READ_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 15)
-#define		READ_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 16)
-#define		WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT		(1 << 18)
-#define		WRITE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 19)
-#define		PAGE_TABLE_BLOCK_SIZE(x)			(((x) & 0xF) << 24)
-#define VM_CONTEXT1_CNTL				0x505
-#define VM_CONTEXT0_CNTL2				0x50C
-#define VM_CONTEXT1_CNTL2				0x50D
-#define	VM_CONTEXT8_PAGE_TABLE_BASE_ADDR		0x50E
-#define	VM_CONTEXT9_PAGE_TABLE_BASE_ADDR		0x50F
-#define	VM_CONTEXT10_PAGE_TABLE_BASE_ADDR		0x510
-#define	VM_CONTEXT11_PAGE_TABLE_BASE_ADDR		0x511
-#define	VM_CONTEXT12_PAGE_TABLE_BASE_ADDR		0x512
-#define	VM_CONTEXT13_PAGE_TABLE_BASE_ADDR		0x513
-#define	VM_CONTEXT14_PAGE_TABLE_BASE_ADDR		0x514
-#define	VM_CONTEXT15_PAGE_TABLE_BASE_ADDR		0x515
-
-#define	VM_CONTEXT1_PROTECTION_FAULT_ADDR		0x53f
-#define	VM_CONTEXT1_PROTECTION_FAULT_STATUS		0x537
-#define		PROTECTIONS_MASK			(0xf << 0)
-#define		PROTECTIONS_SHIFT			0
-		/* bit 0: range
-		 * bit 1: pde0
-		 * bit 2: valid
-		 * bit 3: read
-		 * bit 4: write
-		 */
-#define		MEMORY_CLIENT_ID_MASK			(0xff << 12)
-#define		MEMORY_CLIENT_ID_SHIFT			12
-#define		MEMORY_CLIENT_RW_MASK			(1 << 24)
-#define		MEMORY_CLIENT_RW_SHIFT			24
-#define		FAULT_VMID_MASK				(0xf << 25)
-#define		FAULT_VMID_SHIFT			25
-
 #define VM_INVALIDATE_REQUEST				0x51E
 #define VM_INVALIDATE_RESPONSE				0x51F
 
-#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR	0x546
-#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR	0x547
-
-#define	VM_CONTEXT0_PAGE_TABLE_BASE_ADDR		0x54F
-#define	VM_CONTEXT1_PAGE_TABLE_BASE_ADDR		0x550
-#define	VM_CONTEXT2_PAGE_TABLE_BASE_ADDR		0x551
-#define	VM_CONTEXT3_PAGE_TABLE_BASE_ADDR		0x552
-#define	VM_CONTEXT4_PAGE_TABLE_BASE_ADDR		0x553
-#define	VM_CONTEXT5_PAGE_TABLE_BASE_ADDR		0x554
-#define	VM_CONTEXT6_PAGE_TABLE_BASE_ADDR		0x555
-#define	VM_CONTEXT7_PAGE_TABLE_BASE_ADDR		0x556
-#define	VM_CONTEXT0_PAGE_TABLE_START_ADDR		0x557
-#define	VM_CONTEXT1_PAGE_TABLE_START_ADDR		0x558
-
-#define	VM_CONTEXT0_PAGE_TABLE_END_ADDR			0x55F
-#define	VM_CONTEXT1_PAGE_TABLE_END_ADDR			0x560
-
 #define VM_L2_CG           				0x570
 #define		MC_CG_ENABLE				(1 << 18)
 #define		MC_LS_ENABLE				(1 << 19)
 
-#define MC_SHARED_CHMAP						0x801
-#define		NOOFCHAN_SHIFT					12
-#define		NOOFCHAN_MASK					0x0000f000
-#define MC_SHARED_CHREMAP					0x802
-
 #define	MC_VM_FB_LOCATION				0x809
 #define	MC_VM_AGP_TOP					0x80A
 #define	MC_VM_AGP_BOT					0x80B
@@ -491,21 +112,6 @@
 #define MC_CITF_MISC_WR_CG           			0x993
 #define MC_CITF_MISC_VM_CG           			0x994
 
-#define	MC_ARB_RAMCFG					0x9D8
-#define		NOOFBANK_SHIFT					0
-#define		NOOFBANK_MASK					0x00000003
-#define		NOOFRANK_SHIFT					2
-#define		NOOFRANK_MASK					0x00000004
-#define		NOOFROWS_SHIFT					3
-#define		NOOFROWS_MASK					0x00000038
-#define		NOOFCOLS_SHIFT					6
-#define		NOOFCOLS_MASK					0x000000C0
-#define		CHANSIZE_SHIFT					8
-#define		CHANSIZE_MASK					0x00000100
-#define		CHANSIZE_OVERRIDE				(1 << 11)
-#define		NOOFGROUPS_SHIFT				12
-#define		NOOFGROUPS_MASK					0x00001000
-
 #define	MC_ARB_DRAM_TIMING				0x9DD
 #define	MC_ARB_DRAM_TIMING2				0x9DE
 
@@ -631,20 +237,6 @@
 #define		CLKS(x)					((x) << 0)
 #define		CLKS_MASK				(0xfff << 0)
 
-#define	HDP_HOST_PATH_CNTL				0xB00
-#define 	CLOCK_GATING_DIS			(1 << 23)
-#define	HDP_NONSURFACE_BASE				0xB01
-#define	HDP_NONSURFACE_INFO				0xB02
-#define	HDP_NONSURFACE_SIZE				0xB03
-
-#define HDP_DEBUG0  					0xBCC
-
-#define HDP_ADDR_CONFIG  				0xBD2
-#define HDP_MISC_CNTL					0xBD3
-#define 	HDP_FLUSH_INVALIDATE_CACHE			(1 << 0)
-#define HDP_MEM_POWER_LS				0xBD4
-#define 	HDP_LS_ENABLE				(1 << 0)
-
 #define ATC_MISC_CG           				0xCD4
 
 #define IH_RB_CNTL                                        0xF80
@@ -674,8 +266,6 @@
 #       define MC_WR_CLEAN_CNT(x)                         ((x) << 20)
 #       define MC_VMID(x)                                 ((x) << 25)
 
-#define	CONFIG_MEMSIZE					0x150A
-
 #define INTERRUPT_CNTL                                    0x151A
 #       define IH_DUMMY_RD_OVERRIDE                       (1 << 0)
 #       define IH_DUMMY_RD_EN                             (1 << 1)
@@ -683,486 +273,22 @@
 #       define GEN_IH_INT_EN                              (1 << 8)
 #define INTERRUPT_CNTL2                                   0x151B
 
-#define HDP_MEM_COHERENCY_FLUSH_CNTL			0x1520
-
-#define	BIF_FB_EN						0x1524
-#define		FB_READ_EN					(1 << 0)
-#define		FB_WRITE_EN					(1 << 1)
-
-#define HDP_REG_COHERENCY_FLUSH_CNTL			0x1528
-
-/* DCE6 ELD audio interface */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0        0x28 /* LPCM */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1        0x29 /* AC3 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2        0x2A /* MPEG1 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3        0x2B /* MP3 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4        0x2C /* MPEG2 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5        0x2D /* AAC */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6        0x2E /* DTS */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7        0x2F /* ATRAC */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR8        0x30 /* one bit audio - leave at 0 (default) */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9        0x31 /* Dolby Digital */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10       0x32 /* DTS-HD */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11       0x33 /* MAT-MLP */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR12       0x34 /* DTS */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13       0x35 /* WMA Pro */
-#       define MAX_CHANNELS(x)                            (((x) & 0x7) << 0)
-/* max channels minus one.  7 = 8 channels */
-#       define SUPPORTED_FREQUENCIES(x)                   (((x) & 0xff) << 8)
-#       define DESCRIPTOR_BYTE_2(x)                       (((x) & 0xff) << 16)
-#       define SUPPORTED_FREQUENCIES_STEREO(x)            (((x) & 0xff) << 24) /* LPCM only */
-/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
- * bit0 = 32 kHz
- * bit1 = 44.1 kHz
- * bit2 = 48 kHz
- * bit3 = 88.2 kHz
- * bit4 = 96 kHz
- * bit5 = 176.4 kHz
- * bit6 = 192 kHz
- */
-
-#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC         0x37
-#       define VIDEO_LIPSYNC(x)                           (((x) & 0xff) << 0)
-#       define AUDIO_LIPSYNC(x)                           (((x) & 0xff) << 8)
-/* VIDEO_LIPSYNC, AUDIO_LIPSYNC
- * 0   = invalid
- * x   = legal delay value
- * 255 = sync not supported
- */
-#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_HBR             0x38
-#       define HBR_CAPABLE                                (1 << 0) /* enabled by default */
-
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO0               0x3a
-#       define MANUFACTURER_ID(x)                        (((x) & 0xffff) << 0)
-#       define PRODUCT_ID(x)                             (((x) & 0xffff) << 16)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO1               0x3b
-#       define SINK_DESCRIPTION_LEN(x)                   (((x) & 0xff) << 0)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO2               0x3c
-#       define PORT_ID0(x)                               (((x) & 0xffffffff) << 0)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO3               0x3d
-#       define PORT_ID1(x)                               (((x) & 0xffffffff) << 0)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO4               0x3e
-#       define DESCRIPTION0(x)                           (((x) & 0xff) << 0)
-#       define DESCRIPTION1(x)                           (((x) & 0xff) << 8)
-#       define DESCRIPTION2(x)                           (((x) & 0xff) << 16)
-#       define DESCRIPTION3(x)                           (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO5               0x3f
-#       define DESCRIPTION4(x)                           (((x) & 0xff) << 0)
-#       define DESCRIPTION5(x)                           (((x) & 0xff) << 8)
-#       define DESCRIPTION6(x)                           (((x) & 0xff) << 16)
-#       define DESCRIPTION7(x)                           (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO6               0x40
-#       define DESCRIPTION8(x)                           (((x) & 0xff) << 0)
-#       define DESCRIPTION9(x)                           (((x) & 0xff) << 8)
-#       define DESCRIPTION10(x)                          (((x) & 0xff) << 16)
-#       define DESCRIPTION11(x)                          (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO7               0x41
-#       define DESCRIPTION12(x)                          (((x) & 0xff) << 0)
-#       define DESCRIPTION13(x)                          (((x) & 0xff) << 8)
-#       define DESCRIPTION14(x)                          (((x) & 0xff) << 16)
-#       define DESCRIPTION15(x)                          (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO8               0x42
-#       define DESCRIPTION16(x)                          (((x) & 0xff) << 0)
-#       define DESCRIPTION17(x)                          (((x) & 0xff) << 8)
-
-#define AZ_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL         0x54
-#       define AUDIO_ENABLED                             (1 << 31)
-
-#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT  0x56
-#define		PORT_CONNECTIVITY_MASK				(3 << 30)
-#define		PORT_CONNECTIVITY_SHIFT				30
-
-#define	DC_LB_MEMORY_SPLIT					0x1AC3
-#define		DC_LB_MEMORY_CONFIG(x)				((x) << 20)
-
-#define	PRIORITY_A_CNT						0x1AC6
-#define		PRIORITY_MARK_MASK				0x7fff
-#define		PRIORITY_OFF					(1 << 16)
-#define		PRIORITY_ALWAYS_ON				(1 << 20)
-#define	PRIORITY_B_CNT						0x1AC7
-
-#define	DPG_PIPE_ARBITRATION_CONTROL3				0x1B32
-#       define LATENCY_WATERMARK_MASK(x)			((x) << 16)
-#define	DPG_PIPE_LATENCY_CONTROL				0x1B33
-#       define LATENCY_LOW_WATERMARK(x)				((x) << 0)
-#       define LATENCY_HIGH_WATERMARK(x)			((x) << 16)
-
-/* 0x6bb8, 0x77b8, 0x103b8, 0x10fb8, 0x11bb8, 0x127b8 */
-#define VLINE_STATUS                                    0x1AEE
-#       define VLINE_OCCURRED                           (1 << 0)
-#       define VLINE_ACK                                (1 << 4)
-#       define VLINE_STAT                               (1 << 12)
-#       define VLINE_INTERRUPT                          (1 << 16)
-#       define VLINE_INTERRUPT_TYPE                     (1 << 17)
-/* 0x6bbc, 0x77bc, 0x103bc, 0x10fbc, 0x11bbc, 0x127bc */
-#define VBLANK_STATUS                                   0x1AEF
-#       define VBLANK_OCCURRED                          (1 << 0)
-#       define VBLANK_ACK                               (1 << 4)
-#       define VBLANK_STAT                              (1 << 12)
-#       define VBLANK_INTERRUPT                         (1 << 16)
-#       define VBLANK_INTERRUPT_TYPE                    (1 << 17)
-
-/* 0x6b40, 0x7740, 0x10340, 0x10f40, 0x11b40, 0x12740 */
-#define INT_MASK                                        0x1AD0
-#       define VBLANK_INT_MASK                          (1 << 0)
-#       define VLINE_INT_MASK                           (1 << 4)
-
-#define DISP_INTERRUPT_STATUS                           0x183D
-#       define LB_D1_VLINE_INTERRUPT                    (1 << 2)
-#       define LB_D1_VBLANK_INTERRUPT                   (1 << 3)
-#       define DC_HPD1_INTERRUPT                        (1 << 17)
-#       define DC_HPD1_RX_INTERRUPT                     (1 << 18)
-#       define DACA_AUTODETECT_INTERRUPT                (1 << 22)
-#       define DACB_AUTODETECT_INTERRUPT                (1 << 23)
-#       define DC_I2C_SW_DONE_INTERRUPT                 (1 << 24)
-#       define DC_I2C_HW_DONE_INTERRUPT                 (1 << 25)
-#define DISP_INTERRUPT_STATUS_CONTINUE                  0x183E
-#       define LB_D2_VLINE_INTERRUPT                    (1 << 2)
-#       define LB_D2_VBLANK_INTERRUPT                   (1 << 3)
-#       define DC_HPD2_INTERRUPT                        (1 << 17)
-#       define DC_HPD2_RX_INTERRUPT                     (1 << 18)
-#       define DISP_TIMER_INTERRUPT                     (1 << 24)
-#define DISP_INTERRUPT_STATUS_CONTINUE2                 0x183F
-#       define LB_D3_VLINE_INTERRUPT                    (1 << 2)
-#       define LB_D3_VBLANK_INTERRUPT                   (1 << 3)
-#       define DC_HPD3_INTERRUPT                        (1 << 17)
-#       define DC_HPD3_RX_INTERRUPT                     (1 << 18)
-#define DISP_INTERRUPT_STATUS_CONTINUE3                 0x1840
-#       define LB_D4_VLINE_INTERRUPT                    (1 << 2)
-#       define LB_D4_VBLANK_INTERRUPT                   (1 << 3)
-#       define DC_HPD4_INTERRUPT                        (1 << 17)
-#       define DC_HPD4_RX_INTERRUPT                     (1 << 18)
-#define DISP_INTERRUPT_STATUS_CONTINUE4                 0x1853
-#       define LB_D5_VLINE_INTERRUPT                    (1 << 2)
-#       define LB_D5_VBLANK_INTERRUPT                   (1 << 3)
-#       define DC_HPD5_INTERRUPT                        (1 << 17)
-#       define DC_HPD5_RX_INTERRUPT                     (1 << 18)
-#define DISP_INTERRUPT_STATUS_CONTINUE5                 0x1854
-#       define LB_D6_VLINE_INTERRUPT                    (1 << 2)
-#       define LB_D6_VBLANK_INTERRUPT                   (1 << 3)
-#       define DC_HPD6_INTERRUPT                        (1 << 17)
-#       define DC_HPD6_RX_INTERRUPT                     (1 << 18)
-
-/* 0x6858, 0x7458, 0x10058, 0x10c58, 0x11858, 0x12458 */
-#define GRPH_INT_STATUS                                 0x1A16
-#       define GRPH_PFLIP_INT_OCCURRED                  (1 << 0)
-#       define GRPH_PFLIP_INT_CLEAR                     (1 << 8)
-/* 0x685c, 0x745c, 0x1005c, 0x10c5c, 0x1185c, 0x1245c */
-#define	GRPH_INT_CONTROL			        0x1A17
-#       define GRPH_PFLIP_INT_MASK                      (1 << 0)
-#       define GRPH_PFLIP_INT_TYPE                      (1 << 8)
-
-#define	DAC_AUTODETECT_INT_CONTROL			0x19F2
-
-#define DC_HPD1_INT_STATUS                              0x1807
-#define DC_HPD2_INT_STATUS                              0x180A
-#define DC_HPD3_INT_STATUS                              0x180D
-#define DC_HPD4_INT_STATUS                              0x1810
-#define DC_HPD5_INT_STATUS                              0x1813
-#define DC_HPD6_INT_STATUS                              0x1816
-#       define DC_HPDx_INT_STATUS                       (1 << 0)
-#       define DC_HPDx_SENSE                            (1 << 1)
-#       define DC_HPDx_RX_INT_STATUS                    (1 << 8)
-
-#define DC_HPD1_INT_CONTROL                             0x1808
-#define DC_HPD2_INT_CONTROL                             0x180B
-#define DC_HPD3_INT_CONTROL                             0x180E
-#define DC_HPD4_INT_CONTROL                             0x1811
-#define DC_HPD5_INT_CONTROL                             0x1814
-#define DC_HPD6_INT_CONTROL                             0x1817
-#       define DC_HPDx_INT_ACK                          (1 << 0)
-#       define DC_HPDx_INT_POLARITY                     (1 << 8)
-#       define DC_HPDx_INT_EN                           (1 << 16)
-#       define DC_HPDx_RX_INT_ACK                       (1 << 20)
-#       define DC_HPDx_RX_INT_EN                        (1 << 24)
-
-#define DC_HPD1_CONTROL                                   0x1809
-#define DC_HPD2_CONTROL                                   0x180C
-#define DC_HPD3_CONTROL                                   0x180F
-#define DC_HPD4_CONTROL                                   0x1812
-#define DC_HPD5_CONTROL                                   0x1815
-#define DC_HPD6_CONTROL                                   0x1818
-#       define DC_HPDx_CONNECTION_TIMER(x)                ((x) << 0)
-#       define DC_HPDx_RX_INT_TIMER(x)                    ((x) << 16)
-#       define DC_HPDx_EN                                 (1 << 28)
-
-#define DPG_PIPE_STUTTER_CONTROL                          0x1B35
-#       define STUTTER_ENABLE                             (1 << 0)
-
-/* 0x6e98, 0x7a98, 0x10698, 0x11298, 0x11e98, 0x12a98 */
-#define CRTC_STATUS_FRAME_COUNT                         0x1BA6
-
-/* Audio clocks */
-#define DCCG_AUDIO_DTO0_PHASE                           0x05b0
-#define DCCG_AUDIO_DTO0_MODULE                          0x05b4
-#define DCCG_AUDIO_DTO1_PHASE                           0x05c0
-#define DCCG_AUDIO_DTO1_MODULE                          0x05c4
-
-#define	GRBM_CNTL					0x2000
-#define		GRBM_READ_TIMEOUT(x)				((x) << 0)
-
-#define	GRBM_STATUS2					0x2002
-#define		RLC_RQ_PENDING 					(1 << 0)
-#define		RLC_BUSY 					(1 << 8)
-#define		TC_BUSY 					(1 << 9)
-
-#define	GRBM_STATUS					0x2004
-#define		CMDFIFO_AVAIL_MASK				0x0000000F
-#define		RING2_RQ_PENDING				(1 << 4)
-#define		SRBM_RQ_PENDING					(1 << 5)
-#define		RING1_RQ_PENDING				(1 << 6)
-#define		CF_RQ_PENDING					(1 << 7)
-#define		PF_RQ_PENDING					(1 << 8)
-#define		GDS_DMA_RQ_PENDING				(1 << 9)
-#define		GRBM_EE_BUSY					(1 << 10)
-#define		DB_CLEAN					(1 << 12)
-#define		CB_CLEAN					(1 << 13)
-#define		TA_BUSY 					(1 << 14)
-#define		GDS_BUSY 					(1 << 15)
-#define		VGT_BUSY					(1 << 17)
-#define		IA_BUSY_NO_DMA					(1 << 18)
-#define		IA_BUSY						(1 << 19)
-#define		SX_BUSY 					(1 << 20)
-#define		SPI_BUSY					(1 << 22)
-#define		BCI_BUSY					(1 << 23)
-#define		SC_BUSY 					(1 << 24)
-#define		PA_BUSY 					(1 << 25)
-#define		DB_BUSY 					(1 << 26)
-#define		CP_COHERENCY_BUSY      				(1 << 28)
-#define		CP_BUSY 					(1 << 29)
-#define		CB_BUSY 					(1 << 30)
-#define		GUI_ACTIVE					(1 << 31)
-#define	GRBM_STATUS_SE0					0x2005
-#define	GRBM_STATUS_SE1					0x2006
-#define		SE_DB_CLEAN					(1 << 1)
-#define		SE_CB_CLEAN					(1 << 2)
-#define		SE_BCI_BUSY					(1 << 22)
-#define		SE_VGT_BUSY					(1 << 23)
-#define		SE_PA_BUSY					(1 << 24)
-#define		SE_TA_BUSY					(1 << 25)
-#define		SE_SX_BUSY					(1 << 26)
-#define		SE_SPI_BUSY					(1 << 27)
-#define		SE_SC_BUSY					(1 << 29)
-#define		SE_DB_BUSY					(1 << 30)
-#define		SE_CB_BUSY					(1 << 31)
-
-#define GRBM_INT_CNTL                                   0x2018
-#       define RDERR_INT_ENABLE                         (1 << 0)
-#       define GUI_IDLE_INT_ENABLE                      (1 << 19)
-
-#define	CP_STRMOUT_CNTL					0x213F
-#define	SCRATCH_REG0					0x2140
-#define	SCRATCH_REG1					0x2141
-#define	SCRATCH_REG2					0x2142
-#define	SCRATCH_REG3					0x2143
-#define	SCRATCH_REG4					0x2144
-#define	SCRATCH_REG5					0x2145
-#define	SCRATCH_REG6					0x2146
-#define	SCRATCH_REG7					0x2147
-
-#define	SCRATCH_UMSK					0x2150
-#define	SCRATCH_ADDR					0x2151
-
-#define	CP_SEM_WAIT_TIMER				0x216F
-
-#define	CP_SEM_INCOMPLETE_TIMER_CNTL			0x2172
-
-#define CP_ME_CNTL					0x21B6
-#define		CP_CE_HALT					(1 << 24)
-#define		CP_PFP_HALT					(1 << 26)
-#define		CP_ME_HALT					(1 << 28)
-
-#define	CP_COHER_CNTL2					0x217A
-
-#define	CP_RB2_RPTR					0x21BE
-#define	CP_RB1_RPTR					0x21BF
-#define	CP_RB0_RPTR					0x21C0
-#define	CP_RB_WPTR_DELAY				0x21C1
-
-#define	CP_QUEUE_THRESHOLDS				0x21D8
-#define		ROQ_IB1_START(x)				((x) << 0)
-#define		ROQ_IB2_START(x)				((x) << 8)
-#define CP_MEQ_THRESHOLDS				0x21D9
-#define		MEQ1_START(x)				((x) << 0)
-#define		MEQ2_START(x)				((x) << 8)
-
-#define	CP_PERFMON_CNTL					0x21FF
-
 #define	VGT_VTX_VECT_EJECT_REG				0x222C
-
 #define	VGT_ESGS_RING_SIZE				0x2232
 #define	VGT_GSVS_RING_SIZE				0x2233
-
 #define	VGT_GS_VERTEX_REUSE				0x2235
-
 #define	VGT_PRIMITIVE_TYPE				0x2256
 #define	VGT_INDEX_TYPE					0x2257
-
 #define	VGT_NUM_INDICES					0x225C
 #define	VGT_NUM_INSTANCES				0x225D
-
 #define	VGT_TF_RING_SIZE				0x2262
-
 #define	VGT_HS_OFFCHIP_PARAM				0x226C
-
 #define	VGT_TF_MEMORY_BASE				0x226E
 
-#define	PA_CL_ENHANCE					0x2285
-#define		CLIP_VTX_REORDER_ENA				(1 << 0)
-#define		NUM_CLIP_SEQ(x)					((x) << 1)
-
-#define	PA_SU_LINE_STIPPLE_VALUE			0x2298
-
-#define	PA_SC_LINE_STIPPLE_STATE			0x22C4
-
-#define	PA_SC_FORCE_EOV_MAX_CNTS			0x22C9
-#define		FORCE_EOV_MAX_CLK_CNT(x)			((x) << 0)
-#define		FORCE_EOV_MAX_REZ_CNT(x)			((x) << 16)
-
-#define	PA_SC_FIFO_SIZE					0x22F3
-#define		SC_FRONTEND_PRIM_FIFO_SIZE(x)			((x) << 0)
-#define		SC_BACKEND_PRIM_FIFO_SIZE(x)			((x) << 6)
-#define		SC_HIZ_TILE_FIFO_SIZE(x)			((x) << 15)
-#define		SC_EARLYZ_TILE_FIFO_SIZE(x)			((x) << 23)
-
 #define	PA_SC_ENHANCE					0x22FC
 
-#define	SQ_CONFIG					0x2300
-
-#define	SQC_CACHES					0x2302
-
-#define SQ_POWER_THROTTLE                               0x2396
-#define		MIN_POWER(x)				((x) << 0)
-#define		MIN_POWER_MASK				(0x3fff << 0)
-#define		MIN_POWER_SHIFT				0
-#define		MAX_POWER(x)				((x) << 16)
-#define		MAX_POWER_MASK				(0x3fff << 16)
-#define		MAX_POWER_SHIFT				0
-#define SQ_POWER_THROTTLE2                              0x2397
-#define		MAX_POWER_DELTA(x)			((x) << 0)
-#define		MAX_POWER_DELTA_MASK			(0x3fff << 0)
-#define		MAX_POWER_DELTA_SHIFT			0
-#define		STI_SIZE(x)				((x) << 16)
-#define		STI_SIZE_MASK				(0x3ff << 16)
-#define		STI_SIZE_SHIFT				16
-#define		LTI_RATIO(x)				((x) << 27)
-#define		LTI_RATIO_MASK				(0xf << 27)
-#define		LTI_RATIO_SHIFT				27
-
-#define	SX_DEBUG_1					0x2418
-
-#define	SPI_STATIC_THREAD_MGMT_1			0x2438
-#define	SPI_STATIC_THREAD_MGMT_2			0x2439
-#define	SPI_STATIC_THREAD_MGMT_3			0x243A
-#define	SPI_PS_MAX_WAVE_ID				0x243B
-
-#define	SPI_CONFIG_CNTL					0x2440
-
-#define	SPI_CONFIG_CNTL_1				0x244F
-#define		VTX_DONE_DELAY(x)				((x) << 0)
-#define		INTERP_ONE_PRIM_PER_ROW				(1 << 4)
-
-#define	CGTS_TCC_DISABLE				0x2452
-#define	CGTS_USER_TCC_DISABLE				0x2453
-#define		TCC_DISABLE_MASK				0xFFFF0000
-#define		TCC_DISABLE_SHIFT				16
-#define	CGTS_SM_CTRL_REG				0x2454
-#define		OVERRIDE				(1 << 21)
-#define		LS_OVERRIDE				(1 << 22)
-
-#define	SPI_LB_CU_MASK					0x24D5
-
 #define	TA_CNTL_AUX					0x2542
 
-#define CC_RB_BACKEND_DISABLE				0x263D
-#define		BACKEND_DISABLE(x)     			((x) << 16)
-#define GB_ADDR_CONFIG  				0x263E
-#define		NUM_PIPES(x)				((x) << 0)
-#define		NUM_PIPES_MASK				0x00000007
-#define		NUM_PIPES_SHIFT				0
-#define		PIPE_INTERLEAVE_SIZE(x)			((x) << 4)
-#define		PIPE_INTERLEAVE_SIZE_MASK		0x00000070
-#define		PIPE_INTERLEAVE_SIZE_SHIFT		4
-#define		NUM_SHADER_ENGINES(x)			((x) << 12)
-#define		NUM_SHADER_ENGINES_MASK			0x00003000
-#define		NUM_SHADER_ENGINES_SHIFT		12
-#define		SHADER_ENGINE_TILE_SIZE(x)     		((x) << 16)
-#define		SHADER_ENGINE_TILE_SIZE_MASK		0x00070000
-#define		SHADER_ENGINE_TILE_SIZE_SHIFT		16
-#define		NUM_GPUS(x)     			((x) << 20)
-#define		NUM_GPUS_MASK				0x00700000
-#define		NUM_GPUS_SHIFT				20
-#define		MULTI_GPU_TILE_SIZE(x)     		((x) << 24)
-#define		MULTI_GPU_TILE_SIZE_MASK		0x03000000
-#define		MULTI_GPU_TILE_SIZE_SHIFT		24
-#define		ROW_SIZE(x)             		((x) << 28)
-#define		ROW_SIZE_MASK				0x30000000
-#define		ROW_SIZE_SHIFT				28
-
-#define	CB_PERFCOUNTER0_SELECT0				0x2688
-#define	CB_PERFCOUNTER0_SELECT1				0x2689
-#define	CB_PERFCOUNTER1_SELECT0				0x268A
-#define	CB_PERFCOUNTER1_SELECT1				0x268B
-#define	CB_PERFCOUNTER2_SELECT0				0x268C
-#define	CB_PERFCOUNTER2_SELECT1				0x268D
-#define	CB_PERFCOUNTER3_SELECT0				0x268E
-#define	CB_PERFCOUNTER3_SELECT1				0x268F
-
-#define	CB_CGTT_SCLK_CTRL				0x2698
-
-#define	TCP_CHAN_STEER_LO				0x2B03
-#define	TCP_CHAN_STEER_HI				0x2B94
-
-#define	CP_RB0_BASE					0x3040
-#define	CP_RB0_CNTL					0x3041
-#define		RB_BUFSZ(x)					((x) << 0)
-#define		RB_BLKSZ(x)					((x) << 8)
-#define		BUF_SWAP_32BIT					(2 << 16)
-#define		RB_NO_UPDATE					(1 << 27)
-#define		RB_RPTR_WR_ENA					(1 << 31)
-
-#define	CP_RB0_RPTR_ADDR				0x3043
-#define	CP_RB0_RPTR_ADDR_HI				0x3044
-#define	CP_RB0_WPTR					0x3045
-
-#define	CP_PFP_UCODE_ADDR				0x3054
-#define	CP_PFP_UCODE_DATA				0x3055
-#define	CP_ME_RAM_RADDR					0x3056
-#define	CP_ME_RAM_WADDR					0x3057
-#define	CP_ME_RAM_DATA					0x3058
-
-#define	CP_CE_UCODE_ADDR				0x305A
-#define	CP_CE_UCODE_DATA				0x305B
-
-#define	CP_RB1_BASE					0x3060
-#define	CP_RB1_CNTL					0x3061
-#define	CP_RB1_RPTR_ADDR				0x3062
-#define	CP_RB1_RPTR_ADDR_HI				0x3063
-#define	CP_RB1_WPTR					0x3064
-#define	CP_RB2_BASE					0x3065
-#define	CP_RB2_CNTL					0x3066
-#define	CP_RB2_RPTR_ADDR				0x3067
-#define	CP_RB2_RPTR_ADDR_HI				0x3068
-#define	CP_RB2_WPTR					0x3069
-#define CP_INT_CNTL_RING0                               0x306A
-#define CP_INT_CNTL_RING1                               0x306B
-#define CP_INT_CNTL_RING2                               0x306C
-#       define CNTX_BUSY_INT_ENABLE                     (1 << 19)
-#       define CNTX_EMPTY_INT_ENABLE                    (1 << 20)
-#       define WAIT_MEM_SEM_INT_ENABLE                  (1 << 21)
-#       define TIME_STAMP_INT_ENABLE                    (1 << 26)
-#       define CP_RINGID2_INT_ENABLE                    (1 << 29)
-#       define CP_RINGID1_INT_ENABLE                    (1 << 30)
-#       define CP_RINGID0_INT_ENABLE                    (1 << 31)
-#define CP_INT_STATUS_RING0                             0x306D
-#define CP_INT_STATUS_RING1                             0x306E
-#define CP_INT_STATUS_RING2                             0x306F
-#       define WAIT_MEM_SEM_INT_STAT                    (1 << 21)
-#       define TIME_STAMP_INT_STAT                      (1 << 26)
-#       define CP_RINGID2_INT_STAT                      (1 << 29)
-#       define CP_RINGID1_INT_STAT                      (1 << 30)
-#       define CP_RINGID0_INT_STAT                      (1 << 31)
-
 // #define PA_SC_RASTER_CONFIG                             0xA0D4
 #	define RB_XSEL2(x)				((x) << 4)
 #	define RB_XSEL2_MASK				(0x3 << 4)
@@ -1185,171 +311,14 @@
 #	define SE_YSEL(x)				((x) << 28)
 #	define SE_YSEL_MASK				(0x3 << 28)
 
-/* PIF PHY0 registers idx/data 0x8/0xc */
-#define PB0_PIF_CNTL                                      0x10
-#       define LS2_EXIT_TIME(x)                           ((x) << 17)
-#       define LS2_EXIT_TIME_MASK                         (0x7 << 17)
-#       define LS2_EXIT_TIME_SHIFT                        17
-#define PB0_PIF_PAIRING                                   0x11
-#       define MULTI_PIF                                  (1 << 25)
-#define PB0_PIF_PWRDOWN_0                                 0x12
-#       define PLL_POWER_STATE_IN_TXS2_0(x)               ((x) << 7)
-#       define PLL_POWER_STATE_IN_TXS2_0_MASK             (0x7 << 7)
-#       define PLL_POWER_STATE_IN_TXS2_0_SHIFT            7
-#       define PLL_POWER_STATE_IN_OFF_0(x)                ((x) << 10)
-#       define PLL_POWER_STATE_IN_OFF_0_MASK              (0x7 << 10)
-#       define PLL_POWER_STATE_IN_OFF_0_SHIFT             10
-#       define PLL_RAMP_UP_TIME_0(x)                      ((x) << 24)
-#       define PLL_RAMP_UP_TIME_0_MASK                    (0x7 << 24)
-#       define PLL_RAMP_UP_TIME_0_SHIFT                   24
-#define PB0_PIF_PWRDOWN_1                                 0x13
-#       define PLL_POWER_STATE_IN_TXS2_1(x)               ((x) << 7)
-#       define PLL_POWER_STATE_IN_TXS2_1_MASK             (0x7 << 7)
-#       define PLL_POWER_STATE_IN_TXS2_1_SHIFT            7
-#       define PLL_POWER_STATE_IN_OFF_1(x)                ((x) << 10)
-#       define PLL_POWER_STATE_IN_OFF_1_MASK              (0x7 << 10)
-#       define PLL_POWER_STATE_IN_OFF_1_SHIFT             10
-#       define PLL_RAMP_UP_TIME_1(x)                      ((x) << 24)
-#       define PLL_RAMP_UP_TIME_1_MASK                    (0x7 << 24)
-#       define PLL_RAMP_UP_TIME_1_SHIFT                   24
-
-#define PB0_PIF_PWRDOWN_2                                 0x17
-#       define PLL_POWER_STATE_IN_TXS2_2(x)               ((x) << 7)
-#       define PLL_POWER_STATE_IN_TXS2_2_MASK             (0x7 << 7)
-#       define PLL_POWER_STATE_IN_TXS2_2_SHIFT            7
-#       define PLL_POWER_STATE_IN_OFF_2(x)                ((x) << 10)
-#       define PLL_POWER_STATE_IN_OFF_2_MASK              (0x7 << 10)
-#       define PLL_POWER_STATE_IN_OFF_2_SHIFT             10
-#       define PLL_RAMP_UP_TIME_2(x)                      ((x) << 24)
-#       define PLL_RAMP_UP_TIME_2_MASK                    (0x7 << 24)
-#       define PLL_RAMP_UP_TIME_2_SHIFT                   24
-#define PB0_PIF_PWRDOWN_3                                 0x18
-#       define PLL_POWER_STATE_IN_TXS2_3(x)               ((x) << 7)
-#       define PLL_POWER_STATE_IN_TXS2_3_MASK             (0x7 << 7)
-#       define PLL_POWER_STATE_IN_TXS2_3_SHIFT            7
-#       define PLL_POWER_STATE_IN_OFF_3(x)                ((x) << 10)
-#       define PLL_POWER_STATE_IN_OFF_3_MASK              (0x7 << 10)
-#       define PLL_POWER_STATE_IN_OFF_3_SHIFT             10
-#       define PLL_RAMP_UP_TIME_3(x)                      ((x) << 24)
-#       define PLL_RAMP_UP_TIME_3_MASK                    (0x7 << 24)
-#       define PLL_RAMP_UP_TIME_3_SHIFT                   24
-/* PIF PHY1 registers idx/data 0x10/0x14 */
-#define PB1_PIF_CNTL                                      0x10
-#define PB1_PIF_PAIRING                                   0x11
-#define PB1_PIF_PWRDOWN_0                                 0x12
-#define PB1_PIF_PWRDOWN_1                                 0x13
-
-#define PB1_PIF_PWRDOWN_2                                 0x17
-#define PB1_PIF_PWRDOWN_3                                 0x18
-/* PCIE registers idx/data 0x30/0x34 */
-#define PCIE_CNTL2                                        0x1c /* PCIE */
-#       define SLV_MEM_LS_EN                              (1 << 16)
-#       define SLV_MEM_AGGRESSIVE_LS_EN                   (1 << 17)
-#       define MST_MEM_LS_EN                              (1 << 18)
-#       define REPLAY_MEM_LS_EN                           (1 << 19)
-#define PCIE_LC_STATUS1                                   0x28 /* PCIE */
-#       define LC_REVERSE_RCVR                            (1 << 0)
-#       define LC_REVERSE_XMIT                            (1 << 1)
-#       define LC_OPERATING_LINK_WIDTH_MASK               (0x7 << 2)
-#       define LC_OPERATING_LINK_WIDTH_SHIFT              2
-#       define LC_DETECTED_LINK_WIDTH_MASK                (0x7 << 5)
-#       define LC_DETECTED_LINK_WIDTH_SHIFT               5
-
-#define PCIE_P_CNTL                                       0x40 /* PCIE */
-#       define P_IGNORE_EDB_ERR                           (1 << 6)
-
 /* PCIE PORT registers idx/data 0x38/0x3c */
-#define PCIE_LC_CNTL                                      0xa0
-#       define LC_L0S_INACTIVITY(x)                       ((x) << 8)
-#       define LC_L0S_INACTIVITY_MASK                     (0xf << 8)
-#       define LC_L0S_INACTIVITY_SHIFT                    8
-#       define LC_L1_INACTIVITY(x)                        ((x) << 12)
-#       define LC_L1_INACTIVITY_MASK                      (0xf << 12)
-#       define LC_L1_INACTIVITY_SHIFT                     12
-#       define LC_PMI_TO_L1_DIS                           (1 << 16)
-#       define LC_ASPM_TO_L1_DIS                          (1 << 24)
-#define PCIE_LC_LINK_WIDTH_CNTL                           0xa2 /* PCIE_P */
-#       define LC_LINK_WIDTH_SHIFT                        0
-#       define LC_LINK_WIDTH_MASK                         0x7
+// #define PCIE_LC_LINK_WIDTH_CNTL                           0xa2 /* PCIE_P */
 #       define LC_LINK_WIDTH_X0                           0
 #       define LC_LINK_WIDTH_X1                           1
 #       define LC_LINK_WIDTH_X2                           2
 #       define LC_LINK_WIDTH_X4                           3
 #       define LC_LINK_WIDTH_X8                           4
 #       define LC_LINK_WIDTH_X16                          6
-#       define LC_LINK_WIDTH_RD_SHIFT                     4
-#       define LC_LINK_WIDTH_RD_MASK                      0x70
-#       define LC_RECONFIG_ARC_MISSING_ESCAPE             (1 << 7)
-#       define LC_RECONFIG_NOW                            (1 << 8)
-#       define LC_RENEGOTIATION_SUPPORT                   (1 << 9)
-#       define LC_RENEGOTIATE_EN                          (1 << 10)
-#       define LC_SHORT_RECONFIG_EN                       (1 << 11)
-#       define LC_UPCONFIGURE_SUPPORT                     (1 << 12)
-#       define LC_UPCONFIGURE_DIS                         (1 << 13)
-#       define LC_DYN_LANES_PWR_STATE(x)                  ((x) << 21)
-#       define LC_DYN_LANES_PWR_STATE_MASK                (0x3 << 21)
-#       define LC_DYN_LANES_PWR_STATE_SHIFT               21
-#define PCIE_LC_N_FTS_CNTL                                0xa3 /* PCIE_P */
-#       define LC_XMIT_N_FTS(x)                           ((x) << 0)
-#       define LC_XMIT_N_FTS_MASK                         (0xff << 0)
-#       define LC_XMIT_N_FTS_SHIFT                        0
-#       define LC_XMIT_N_FTS_OVERRIDE_EN                  (1 << 8)
-#       define LC_N_FTS_MASK                              (0xff << 24)
-#define PCIE_LC_SPEED_CNTL                                0xa4 /* PCIE_P */
-#       define LC_GEN2_EN_STRAP                           (1 << 0)
-#       define LC_GEN3_EN_STRAP                           (1 << 1)
-#       define LC_TARGET_LINK_SPEED_OVERRIDE_EN           (1 << 2)
-#       define LC_TARGET_LINK_SPEED_OVERRIDE_MASK         (0x3 << 3)
-#       define LC_TARGET_LINK_SPEED_OVERRIDE_SHIFT        3
-#       define LC_FORCE_EN_SW_SPEED_CHANGE                (1 << 5)
-#       define LC_FORCE_DIS_SW_SPEED_CHANGE               (1 << 6)
-#       define LC_FORCE_EN_HW_SPEED_CHANGE                (1 << 7)
-#       define LC_FORCE_DIS_HW_SPEED_CHANGE               (1 << 8)
-#       define LC_INITIATE_LINK_SPEED_CHANGE              (1 << 9)
-#       define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_MASK      (0x3 << 10)
-#       define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_SHIFT     10
-#       define LC_CURRENT_DATA_RATE_MASK                  (0x3 << 13) /* 0/1/2 = gen1/2/3 */
-#       define LC_CURRENT_DATA_RATE_SHIFT                 13
-#       define LC_CLR_FAILED_SPD_CHANGE_CNT               (1 << 16)
-#       define LC_OTHER_SIDE_EVER_SENT_GEN2               (1 << 18)
-#       define LC_OTHER_SIDE_SUPPORTS_GEN2                (1 << 19)
-#       define LC_OTHER_SIDE_EVER_SENT_GEN3               (1 << 20)
-#       define LC_OTHER_SIDE_SUPPORTS_GEN3                (1 << 21)
-
-#define PCIE_LC_CNTL2                                     0xb1
-#       define LC_ALLOW_PDWN_IN_L1                        (1 << 17)
-#       define LC_ALLOW_PDWN_IN_L23                       (1 << 18)
-
-#define PCIE_LC_CNTL3                                     0xb5 /* PCIE_P */
-#       define LC_GO_TO_RECOVERY                          (1 << 30)
-#define PCIE_LC_CNTL4                                     0xb6 /* PCIE_P */
-#       define LC_REDO_EQ                                 (1 << 5)
-#       define LC_SET_QUIESCE                             (1 << 13)
-
-/*
- * UVD
- */
-#define UVD_UDEC_ADDR_CONFIG				0x3bd3
-#define UVD_UDEC_DB_ADDR_CONFIG				0x3bd4
-#define UVD_UDEC_DBW_ADDR_CONFIG			0x3bd5
-#define UVD_RBC_RB_RPTR					0x3da4
-#define UVD_RBC_RB_WPTR					0x3da5
-#define UVD_STATUS					0x3daf
-
-#define	UVD_CGC_CTRL					0x3dc2
-#	define DCM					(1 << 0)
-#	define CG_DT(x)					((x) << 2)
-#	define CG_DT_MASK				(0xf << 2)
-#	define CLK_OD(x)				((x) << 6)
-#	define CLK_OD_MASK				(0x1f << 6)
-
- /* UVD CTX indirect */
-#define	UVD_CGC_MEM_CTRL				0xC0
-#define	UVD_CGC_CTRL2					0xC1
-#	define DYN_OR_EN				(1 << 0)
-#	define DYN_RR_EN				(1 << 1)
-#	define G_DIV_ID(x)				((x) << 2)
-#	define G_DIV_ID_MASK				(0x7 << 2)
 
 /*
  * PM4
@@ -1583,45 +552,7 @@
 /* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
 #define DMA0_REGISTER_OFFSET                              0x0 /* not a register */
 #define DMA1_REGISTER_OFFSET                              0x200 /* not a register */
-
-#define DMA_RB_CNTL                                       0x3400
-#       define DMA_RB_ENABLE                              (1 << 0)
-#       define DMA_RB_SIZE(x)                             ((x) << 1) /* log2 */
-#       define DMA_RB_SWAP_ENABLE                         (1 << 9) /* 8IN32 */
-#       define DMA_RPTR_WRITEBACK_ENABLE                  (1 << 12)
-#       define DMA_RPTR_WRITEBACK_SWAP_ENABLE             (1 << 13)  /* 8IN32 */
-#       define DMA_RPTR_WRITEBACK_TIMER(x)                ((x) << 16) /* log2 */
-#define DMA_RB_BASE                                       0x3401
-#define DMA_RB_RPTR                                       0x3402
-#define DMA_RB_WPTR                                       0x3403
-
-#define DMA_RB_RPTR_ADDR_HI                               0x3407
-#define DMA_RB_RPTR_ADDR_LO                               0x3408
-
-#define DMA_IB_CNTL                                       0x3409
-#       define DMA_IB_ENABLE                              (1 << 0)
-#       define DMA_IB_SWAP_ENABLE                         (1 << 4)
-#       define CMD_VMID_FORCE                             (1 << 31)
-#define DMA_IB_RPTR                                       0x340a
-#define DMA_CNTL                                          0x340b
-#       define TRAP_ENABLE                                (1 << 0)
-#       define SEM_INCOMPLETE_INT_ENABLE                  (1 << 1)
-#       define SEM_WAIT_INT_ENABLE                        (1 << 2)
-#       define DATA_SWAP_ENABLE                           (1 << 3)
-#       define FENCE_SWAP_ENABLE                          (1 << 4)
-#       define CTXEMPTY_INT_ENABLE                        (1 << 28)
-#define DMA_STATUS_REG                                    0x340d
-#       define DMA_IDLE                                   (1 << 0)
-#define DMA_TILING_CONFIG  				  0x342e
-
-#define	DMA_POWER_CNTL					0x342f
-#       define MEM_POWER_OVERRIDE                       (1 << 8)
-#define	DMA_CLK_CTRL					0x3430
-
-#define	DMA_PG						0x3435
-#	define PG_CNTL_ENABLE				(1 << 0)
-#define	DMA_PGFSM_CONFIG				0x3436
-#define	DMA_PGFSM_WRITE					0x3437
+#define SDMA_MAX_INSTANCE 2
 
 #define DMA_PACKET(cmd, b, t, s, n)	((((cmd) & 0xF) << 28) |	\
 					 (((b) & 0x1) << 26) |		\
@@ -1650,6 +581,7 @@
 #define	DMA_PACKET_POLL_REG_MEM				  0xe
 #define	DMA_PACKET_NOP					  0xf
 
+/* VCE */
 #define VCE_STATUS					0x20004
 #define VCE_VCPU_CNTL					0x20014
 #define		VCE_CLK_EN				(1 << 0)
@@ -1726,378 +658,118 @@
 
 #define CURSOR_WIDTH 64
 #define CURSOR_HEIGHT 64
-#define AMDGPU_MM_INDEX		        0x0000
-#define AMDGPU_MM_DATA		        0x0001
-
-#define VERDE_NUM_CRTC 6
-#define	BLACKOUT_MODE_MASK			0x00000007
-#define	VGA_RENDER_CONTROL			0xC0
-#define R_000300_VGA_RENDER_CONTROL             0xC0
-#define C_000300_VGA_VSTATUS_CNTL               0xFFFCFFFF
-#define EVERGREEN_CRTC_STATUS                   0x1BA3
-#define EVERGREEN_CRTC_V_BLANK                  (1 << 0)
-#define EVERGREEN_CRTC_STATUS_POSITION          0x1BA4
-/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */
-#define EVERGREEN_CRTC_V_BLANK_START_END                0x1b8d
-#define EVERGREEN_CRTC_CONTROL                          0x1b9c
-#define EVERGREEN_CRTC_MASTER_EN                 (1 << 0)
-#define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24)
-#define EVERGREEN_CRTC_BLANK_CONTROL                    0x1b9d
-#define EVERGREEN_CRTC_BLANK_DATA_EN             (1 << 8)
-#define EVERGREEN_CRTC_V_BLANK                   (1 << 0)
-#define EVERGREEN_CRTC_STATUS_HV_COUNT                  0x1ba8
-#define EVERGREEN_CRTC_UPDATE_LOCK                      0x1bb5
-#define EVERGREEN_MASTER_UPDATE_LOCK                    0x1bbd
-#define EVERGREEN_MASTER_UPDATE_MODE                    0x1bbe
-#define EVERGREEN_GRPH_UPDATE_LOCK               (1 << 16)
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH     0x1a07
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH   0x1a08
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS          0x1a04
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS        0x1a05
-#define EVERGREEN_GRPH_UPDATE                           0x1a11
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS               0xc4
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH          0xc9
-#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING    (1 << 2)
-
-#define EVERGREEN_DATA_FORMAT                           0x1ac0
-#       define EVERGREEN_INTERLEAVE_EN                  (1 << 0)
-
-#define R600_D1GRPH_ARRAY_MODE_LINEAR_GENERAL            (0 << 20)
-#define R600_D1GRPH_ARRAY_MODE_LINEAR_ALIGNED            (1 << 20)
-#define R600_D1GRPH_ARRAY_MODE_1D_TILED_THIN1            (2 << 20)
-#define R600_D1GRPH_ARRAY_MODE_2D_TILED_THIN1            (4 << 20)
-
-#define R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH                0x1a45
-#define R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH                0x1845
-
-#define R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH              0x1847
-#define R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH              0x1a47
-
-#define R600_D1GRPH_SWAP_CONTROL                               0x1843
-#define R600_D1GRPH_SWAP_ENDIAN_NONE                    (0 << 0)
-#define R600_D1GRPH_SWAP_ENDIAN_16BIT                   (1 << 0)
-#define R600_D1GRPH_SWAP_ENDIAN_32BIT                   (2 << 0)
-#define R600_D1GRPH_SWAP_ENDIAN_64BIT                   (3 << 0)
-
-#define AVIVO_D1VGA_CONTROL					0x00cc
-#       define AVIVO_DVGA_CONTROL_MODE_ENABLE            (1 << 0)
-#       define AVIVO_DVGA_CONTROL_TIMING_SELECT          (1 << 8)
-#       define AVIVO_DVGA_CONTROL_SYNC_POLARITY_SELECT   (1 << 9)
-#       define AVIVO_DVGA_CONTROL_OVERSCAN_TIMING_SELECT (1 << 10)
-#       define AVIVO_DVGA_CONTROL_OVERSCAN_COLOR_EN      (1 << 16)
-#       define AVIVO_DVGA_CONTROL_ROTATE                 (1 << 24)
-#define AVIVO_D2VGA_CONTROL					0x00ce
-
-#define R600_BUS_CNTL                                           0x1508
-#       define R600_BIOS_ROM_DIS                                (1 << 1)
+
 
 #define R600_ROM_CNTL                              0x580
 #       define R600_SCK_OVERWRITE                  (1 << 1)
 #       define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28
 #       define R600_SCK_PRESCALE_CRYSTAL_CLK_MASK  (0xf << 28)
 
-#define FMT_BIT_DEPTH_CONTROL                0x1bf2
-#define FMT_TRUNCATE_EN               (1 << 0)
-#define FMT_TRUNCATE_DEPTH            (1 << 4)
-#define FMT_SPATIAL_DITHER_EN         (1 << 8)
-#define FMT_SPATIAL_DITHER_MODE(x)    ((x) << 9)
-#define FMT_SPATIAL_DITHER_DEPTH      (1 << 12)
-#define FMT_FRAME_RANDOM_ENABLE       (1 << 13)
-#define FMT_RGB_RANDOM_ENABLE         (1 << 14)
-#define FMT_HIGHPASS_RANDOM_ENABLE    (1 << 15)
-#define FMT_TEMPORAL_DITHER_EN        (1 << 16)
-#define FMT_TEMPORAL_DITHER_DEPTH     (1 << 20)
-#define FMT_TEMPORAL_DITHER_OFFSET(x) ((x) << 21)
-#define FMT_TEMPORAL_LEVEL            (1 << 24)
-#define FMT_TEMPORAL_DITHER_RESET     (1 << 25)
-#define FMT_25FRC_SEL(x)              ((x) << 26)
-#define FMT_50FRC_SEL(x)              ((x) << 28)
-#define FMT_75FRC_SEL(x)              ((x) << 30)
-
-#define EVERGREEN_DC_LUT_CONTROL                        0x1a80
-#define EVERGREEN_DC_LUT_BLACK_OFFSET_BLUE              0x1a81
-#define EVERGREEN_DC_LUT_BLACK_OFFSET_GREEN             0x1a82
-#define EVERGREEN_DC_LUT_BLACK_OFFSET_RED               0x1a83
-#define EVERGREEN_DC_LUT_WHITE_OFFSET_BLUE              0x1a84
-#define EVERGREEN_DC_LUT_WHITE_OFFSET_GREEN             0x1a85
-#define EVERGREEN_DC_LUT_WHITE_OFFSET_RED               0x1a86
-#define EVERGREEN_DC_LUT_30_COLOR                       0x1a7c
-#define EVERGREEN_DC_LUT_RW_INDEX                       0x1a79
-#define EVERGREEN_DC_LUT_WRITE_EN_MASK                  0x1a7e
-#define EVERGREEN_DC_LUT_RW_MODE                        0x1a78
-
-#define EVERGREEN_GRPH_ENABLE                           0x1a00
-#define EVERGREEN_GRPH_CONTROL                          0x1a01
-#define EVERGREEN_GRPH_DEPTH(x)                  (((x) & 0x3) << 0)
-#define EVERGREEN_GRPH_DEPTH_8BPP                0
-#define EVERGREEN_GRPH_DEPTH_16BPP               1
-#define EVERGREEN_GRPH_DEPTH_32BPP               2
-#define EVERGREEN_GRPH_NUM_BANKS(x)              (((x) & 0x3) << 2)
-#define EVERGREEN_ADDR_SURF_2_BANK               0
-#define EVERGREEN_ADDR_SURF_4_BANK               1
-#define EVERGREEN_ADDR_SURF_8_BANK               2
-#define EVERGREEN_ADDR_SURF_16_BANK              3
-#define EVERGREEN_GRPH_Z(x)                      (((x) & 0x3) << 4)
-#define EVERGREEN_GRPH_BANK_WIDTH(x)             (((x) & 0x3) << 6)
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_1         0
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_2         1
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_4         2
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_8         3
-#define EVERGREEN_GRPH_FORMAT(x)                 (((x) & 0x7) << 8)
-
-#define EVERGREEN_GRPH_FORMAT_INDEXED            0
-#define EVERGREEN_GRPH_FORMAT_ARGB1555           0
-#define EVERGREEN_GRPH_FORMAT_ARGB565            1
-#define EVERGREEN_GRPH_FORMAT_ARGB4444           2
-#define EVERGREEN_GRPH_FORMAT_AI88               3
-#define EVERGREEN_GRPH_FORMAT_MONO16             4
-#define EVERGREEN_GRPH_FORMAT_BGRA5551           5
+#define GRPH_ARRAY_LINEAR_GENERAL      0
+#define GRPH_ARRAY_LINEAR_ALIGNED      1
+#define GRPH_ARRAY_1D_TILED_THIN1      2
+#define GRPH_ARRAY_2D_TILED_THIN1      4
+
+#define ES_AND_GS_AUTO       3
+#define BUF_SWAP_32BIT       (2 << 16)
+
+#define GRPH_DEPTH_8BPP                0
+#define GRPH_DEPTH_16BPP               1
+#define GRPH_DEPTH_32BPP               2
+
+/* 8 BPP */
+#define GRPH_FORMAT_INDEXED            0
+
+/* 16 BPP */
+#define GRPH_FORMAT_ARGB1555           0
+#define GRPH_FORMAT_ARGB565            1
+#define GRPH_FORMAT_ARGB4444           2
+#define GRPH_FORMAT_AI88               3
+#define GRPH_FORMAT_MONO16             4
+#define GRPH_FORMAT_BGRA5551           5
 
 /* 32 BPP */
-#define EVERGREEN_GRPH_FORMAT_ARGB8888           0
-#define EVERGREEN_GRPH_FORMAT_ARGB2101010        1
-#define EVERGREEN_GRPH_FORMAT_32BPP_DIG          2
-#define EVERGREEN_GRPH_FORMAT_8B_ARGB2101010     3
-#define EVERGREEN_GRPH_FORMAT_BGRA1010102        4
-#define EVERGREEN_GRPH_FORMAT_8B_BGRA1010102     5
-#define EVERGREEN_GRPH_FORMAT_RGB111110          6
-#define EVERGREEN_GRPH_FORMAT_BGR101111          7
-#define EVERGREEN_GRPH_BANK_HEIGHT(x)            (((x) & 0x3) << 11)
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_1        0
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_2        1
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_4        2
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_8        3
-#define EVERGREEN_GRPH_TILE_SPLIT(x)             (((x) & 0x7) << 13)
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_64B       0
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_128B      1
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_256B      2
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_512B      3
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_1KB       4
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_2KB       5
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_4KB       6
-#define EVERGREEN_GRPH_MACRO_TILE_ASPECT(x)      (((x) & 0x3) << 18)
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1  0
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2  1
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4  2
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8  3
-#define EVERGREEN_GRPH_ARRAY_MODE(x)             (((x) & 0x7) << 20)
-#define EVERGREEN_GRPH_ARRAY_LINEAR_GENERAL      0
-#define EVERGREEN_GRPH_ARRAY_LINEAR_ALIGNED      1
-#define EVERGREEN_GRPH_ARRAY_1D_TILED_THIN1      2
-#define EVERGREEN_GRPH_ARRAY_2D_TILED_THIN1      4
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1  0
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2  1
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4  2
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8  3
-
-#define EVERGREEN_GRPH_SWAP_CONTROL                     0x1a03
-#define EVERGREEN_GRPH_ENDIAN_SWAP(x)            (((x) & 0x3) << 0)
-#       define EVERGREEN_GRPH_ENDIAN_NONE               0
-#       define EVERGREEN_GRPH_ENDIAN_8IN16              1
-#       define EVERGREEN_GRPH_ENDIAN_8IN32              2
-#       define EVERGREEN_GRPH_ENDIAN_8IN64              3
-#define EVERGREEN_GRPH_RED_CROSSBAR(x)           (((x) & 0x3) << 4)
-#       define EVERGREEN_GRPH_RED_SEL_R                 0
-#       define EVERGREEN_GRPH_RED_SEL_G                 1
-#       define EVERGREEN_GRPH_RED_SEL_B                 2
-#       define EVERGREEN_GRPH_RED_SEL_A                 3
-#define EVERGREEN_GRPH_GREEN_CROSSBAR(x)         (((x) & 0x3) << 6)
-#       define EVERGREEN_GRPH_GREEN_SEL_G               0
-#       define EVERGREEN_GRPH_GREEN_SEL_B               1
-#       define EVERGREEN_GRPH_GREEN_SEL_A               2
-#       define EVERGREEN_GRPH_GREEN_SEL_R               3
-#define EVERGREEN_GRPH_BLUE_CROSSBAR(x)          (((x) & 0x3) << 8)
-#       define EVERGREEN_GRPH_BLUE_SEL_B                0
-#       define EVERGREEN_GRPH_BLUE_SEL_A                1
-#       define EVERGREEN_GRPH_BLUE_SEL_R                2
-#       define EVERGREEN_GRPH_BLUE_SEL_G                3
-#define EVERGREEN_GRPH_ALPHA_CROSSBAR(x)         (((x) & 0x3) << 10)
-#       define EVERGREEN_GRPH_ALPHA_SEL_A               0
-#       define EVERGREEN_GRPH_ALPHA_SEL_R               1
-#       define EVERGREEN_GRPH_ALPHA_SEL_G               2
-#       define EVERGREEN_GRPH_ALPHA_SEL_B               3
-
-#define EVERGREEN_D3VGA_CONTROL                         0xf8
-#define EVERGREEN_D4VGA_CONTROL                         0xf9
-#define EVERGREEN_D5VGA_CONTROL                         0xfa
-#define EVERGREEN_D6VGA_CONTROL                         0xfb
-
-#define EVERGREEN_GRPH_SURFACE_ADDRESS_MASK      0xffffff00
-
-#define EVERGREEN_GRPH_LUT_10BIT_BYPASS_CONTROL         0x1a02
-#define EVERGREEN_LUT_10BIT_BYPASS_EN            (1 << 8)
-
-#define EVERGREEN_GRPH_PITCH                            0x1a06
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH     0x1a07
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH   0x1a08
-#define EVERGREEN_GRPH_SURFACE_OFFSET_X                 0x1a09
-#define EVERGREEN_GRPH_SURFACE_OFFSET_Y                 0x1a0a
-#define EVERGREEN_GRPH_X_START                          0x1a0b
-#define EVERGREEN_GRPH_Y_START                          0x1a0c
-#define EVERGREEN_GRPH_X_END                            0x1a0d
-#define EVERGREEN_GRPH_Y_END                            0x1a0e
-#define EVERGREEN_GRPH_UPDATE                           0x1a11
-#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING    (1 << 2)
-#define EVERGREEN_GRPH_UPDATE_LOCK               (1 << 16)
-#define EVERGREEN_GRPH_FLIP_CONTROL                     0x1a12
-#define EVERGREEN_GRPH_SURFACE_UPDATE_H_RETRACE_EN (1 << 0)
-
-#define EVERGREEN_VIEWPORT_START                        0x1b5c
-#define EVERGREEN_VIEWPORT_SIZE                         0x1b5d
-#define EVERGREEN_DESKTOP_HEIGHT                        0x1ac1
-
-/* CUR blocks at 0x6998, 0x7598, 0x10198, 0x10d98, 0x11998, 0x12598 */
-#define EVERGREEN_CUR_CONTROL                           0x1a66
-#       define EVERGREEN_CURSOR_EN                      (1 << 0)
-#       define EVERGREEN_CURSOR_MODE(x)                 (((x) & 0x3) << 8)
-#       define EVERGREEN_CURSOR_MONO                    0
-#       define EVERGREEN_CURSOR_24_1                    1
-#       define EVERGREEN_CURSOR_24_8_PRE_MULT           2
-#       define EVERGREEN_CURSOR_24_8_UNPRE_MULT         3
-#       define EVERGREEN_CURSOR_2X_MAGNIFY              (1 << 16)
-#       define EVERGREEN_CURSOR_FORCE_MC_ON             (1 << 20)
-#       define EVERGREEN_CURSOR_URGENT_CONTROL(x)       (((x) & 0x7) << 24)
-#       define EVERGREEN_CURSOR_URGENT_ALWAYS           0
-#       define EVERGREEN_CURSOR_URGENT_1_8              1
-#       define EVERGREEN_CURSOR_URGENT_1_4              2
-#       define EVERGREEN_CURSOR_URGENT_3_8              3
-#       define EVERGREEN_CURSOR_URGENT_1_2              4
-#define EVERGREEN_CUR_SURFACE_ADDRESS                   0x1a67
-#       define EVERGREEN_CUR_SURFACE_ADDRESS_MASK       0xfffff000
-#define EVERGREEN_CUR_SIZE                              0x1a68
-#define EVERGREEN_CUR_SURFACE_ADDRESS_HIGH              0x1a69
-#define EVERGREEN_CUR_POSITION                          0x1a6a
-#define EVERGREEN_CUR_HOT_SPOT                          0x1a6b
-#define EVERGREEN_CUR_COLOR1                            0x1a6c
-#define EVERGREEN_CUR_COLOR2                            0x1a6d
-#define EVERGREEN_CUR_UPDATE                            0x1a6e
-#       define EVERGREEN_CURSOR_UPDATE_PENDING          (1 << 0)
-#       define EVERGREEN_CURSOR_UPDATE_TAKEN            (1 << 1)
-#       define EVERGREEN_CURSOR_UPDATE_LOCK             (1 << 16)
-#       define EVERGREEN_CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
-
-
-#define NI_INPUT_CSC_CONTROL                           0x1a35
-#       define NI_INPUT_CSC_GRPH_MODE(x)               (((x) & 0x3) << 0)
-#       define NI_INPUT_CSC_BYPASS                     0
-#       define NI_INPUT_CSC_PROG_COEFF                 1
-#       define NI_INPUT_CSC_PROG_SHARED_MATRIXA        2
-#       define NI_INPUT_CSC_OVL_MODE(x)                (((x) & 0x3) << 4)
-
-#define NI_OUTPUT_CSC_CONTROL                          0x1a3c
-#       define NI_OUTPUT_CSC_GRPH_MODE(x)              (((x) & 0x7) << 0)
-#       define NI_OUTPUT_CSC_BYPASS                    0
-#       define NI_OUTPUT_CSC_TV_RGB                    1
-#       define NI_OUTPUT_CSC_YCBCR_601                 2
-#       define NI_OUTPUT_CSC_YCBCR_709                 3
-#       define NI_OUTPUT_CSC_PROG_COEFF                4
-#       define NI_OUTPUT_CSC_PROG_SHARED_MATRIXB       5
-#       define NI_OUTPUT_CSC_OVL_MODE(x)               (((x) & 0x7) << 4)
-
-#define NI_DEGAMMA_CONTROL                             0x1a58
-#       define NI_GRPH_DEGAMMA_MODE(x)                 (((x) & 0x3) << 0)
-#       define NI_DEGAMMA_BYPASS                       0
-#       define NI_DEGAMMA_SRGB_24                      1
-#       define NI_DEGAMMA_XVYCC_222                    2
-#       define NI_OVL_DEGAMMA_MODE(x)                  (((x) & 0x3) << 4)
-#       define NI_ICON_DEGAMMA_MODE(x)                 (((x) & 0x3) << 8)
-#       define NI_CURSOR_DEGAMMA_MODE(x)               (((x) & 0x3) << 12)
-
-#define NI_GAMUT_REMAP_CONTROL                         0x1a59
-#       define NI_GRPH_GAMUT_REMAP_MODE(x)             (((x) & 0x3) << 0)
-#       define NI_GAMUT_REMAP_BYPASS                   0
-#       define NI_GAMUT_REMAP_PROG_COEFF               1
-#       define NI_GAMUT_REMAP_PROG_SHARED_MATRIXA      2
-#       define NI_GAMUT_REMAP_PROG_SHARED_MATRIXB      3
-#       define NI_OVL_GAMUT_REMAP_MODE(x)              (((x) & 0x3) << 4)
-
-#define NI_REGAMMA_CONTROL                             0x1aa0
-#       define NI_GRPH_REGAMMA_MODE(x)                 (((x) & 0x7) << 0)
-#       define NI_REGAMMA_BYPASS                       0
-#       define NI_REGAMMA_SRGB_24                      1
-#       define NI_REGAMMA_XVYCC_222                    2
-#       define NI_REGAMMA_PROG_A                       3
-#       define NI_REGAMMA_PROG_B                       4
-#       define NI_OVL_REGAMMA_MODE(x)                  (((x) & 0x7) << 4)
-
-
-#define NI_PRESCALE_GRPH_CONTROL                       0x1a2d
-#       define NI_GRPH_PRESCALE_BYPASS                 (1 << 4)
-
-#define NI_PRESCALE_OVL_CONTROL                        0x1a31
-#       define NI_OVL_PRESCALE_BYPASS                  (1 << 4)
-
-#define NI_INPUT_GAMMA_CONTROL                         0x1a10
-#       define NI_GRPH_INPUT_GAMMA_MODE(x)             (((x) & 0x3) << 0)
-#       define NI_INPUT_GAMMA_USE_LUT                  0
-#       define NI_INPUT_GAMMA_BYPASS                   1
-#       define NI_INPUT_GAMMA_SRGB_24                  2
-#       define NI_INPUT_GAMMA_XVYCC_222                3
-#       define NI_OVL_INPUT_GAMMA_MODE(x)              (((x) & 0x3) << 4)
-
-#define	BLACKOUT_MODE_MASK			0x00000007
-#define	VGA_RENDER_CONTROL			0xC0
-#define R_000300_VGA_RENDER_CONTROL             0xC0
-#define C_000300_VGA_VSTATUS_CNTL               0xFFFCFFFF
-#define EVERGREEN_CRTC_STATUS                   0x1BA3
-#define EVERGREEN_CRTC_V_BLANK                  (1 << 0)
-#define EVERGREEN_CRTC_STATUS_POSITION          0x1BA4
-/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */
-#define EVERGREEN_CRTC_V_BLANK_START_END                0x1b8d
-#define EVERGREEN_CRTC_CONTROL                          0x1b9c
-#       define EVERGREEN_CRTC_MASTER_EN                 (1 << 0)
-#       define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24)
-#define EVERGREEN_CRTC_BLANK_CONTROL                    0x1b9d
-#       define EVERGREEN_CRTC_BLANK_DATA_EN             (1 << 8)
-#       define EVERGREEN_CRTC_V_BLANK                   (1 << 0)
-#define EVERGREEN_CRTC_STATUS_HV_COUNT                  0x1ba8
-#define EVERGREEN_CRTC_UPDATE_LOCK                      0x1bb5
-#define EVERGREEN_MASTER_UPDATE_LOCK                    0x1bbd
-#define EVERGREEN_MASTER_UPDATE_MODE                    0x1bbe
-#define EVERGREEN_GRPH_UPDATE_LOCK               (1 << 16)
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH     0x1a07
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH   0x1a08
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS          0x1a04
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS        0x1a05
-#define EVERGREEN_GRPH_UPDATE                           0x1a11
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS               0xc4
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH          0xc9
-#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING    (1 << 2)
-
-#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10
-#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x4
-#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80
-#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x7
-#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x400
-#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xa
-#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x2000
-#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xd
-#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10000
-#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x10
-#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80000
-#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x13
-
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID_MASK 0x1e000000
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID__SHIFT 0x19
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS_MASK 0xff
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS__SHIFT 0x0
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID_MASK 0xff000
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID__SHIFT 0xc
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW_MASK 0x1000000
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW__SHIFT 0x18
-
-#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE_MASK 0x7
-#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE__SHIFT 0x0
-
-#define mmBIF_FB_EN__xxFB_READ_EN_MASK 0x1
-#define mmBIF_FB_EN__xxFB_READ_EN__SHIFT 0x0
-#define mmBIF_FB_EN__xxFB_WRITE_EN_MASK 0x2
-#define mmBIF_FB_EN__xxFB_WRITE_EN__SHIFT 0x1
-
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC_MASK 0x20000
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC__SHIFT 0x11
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC_MASK 0x800
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC__SHIFT 0xb
+#define GRPH_FORMAT_ARGB8888           0
+#define GRPH_FORMAT_ARGB2101010        1
+#define GRPH_FORMAT_32BPP_DIG          2
+#define GRPH_FORMAT_8B_ARGB2101010     3
+#define GRPH_FORMAT_BGRA1010102        4
+#define GRPH_FORMAT_8B_BGRA1010102     5
+#define GRPH_FORMAT_RGB111110          6
+#define GRPH_FORMAT_BGR101111          7
+
+#define GRPH_ENDIAN_NONE               0
+#define GRPH_ENDIAN_8IN16              1
+#define GRPH_ENDIAN_8IN32              2
+#define GRPH_ENDIAN_8IN64              3
+#define GRPH_RED_SEL_R                 0
+#define GRPH_RED_SEL_G                 1
+#define GRPH_RED_SEL_B                 2
+#define GRPH_RED_SEL_A                 3
+
+#define GRPH_GREEN_SEL_G               0
+#define GRPH_GREEN_SEL_B               1
+#define GRPH_GREEN_SEL_A               2
+#define GRPH_GREEN_SEL_R               3
+
+#define GRPH_BLUE_SEL_B                0
+#define GRPH_BLUE_SEL_A                1
+#define GRPH_BLUE_SEL_R                2
+#define GRPH_BLUE_SEL_G                3
+
+#define GRPH_ALPHA_SEL_A               0
+#define GRPH_ALPHA_SEL_R               1
+#define GRPH_ALPHA_SEL_G               2
+#define GRPH_ALPHA_SEL_B               3
+
+/* CUR_CONTROL */
+	#define CURSOR_MONO                    0
+	#define CURSOR_24_1                    1
+	#define CURSOR_24_8_PRE_MULT           2
+	#define CURSOR_24_8_UNPRE_MULT         3
+	#define CURSOR_URGENT_ALWAYS           0
+	#define CURSOR_URGENT_1_8              1
+	#define CURSOR_URGENT_1_4              2
+	#define CURSOR_URGENT_3_8              3
+	#define CURSOR_URGENT_1_2              4
+
+/* INPUT_CSC_CONTROL */
+#       define INPUT_CSC_BYPASS                     0
+#       define INPUT_CSC_PROG_COEFF                 1
+#       define INPUT_CSC_PROG_SHARED_MATRIXA        2
+
+/* OUTPUT_CSC_CONTROL */
+#       define OUTPUT_CSC_BYPASS                    0
+#       define OUTPUT_CSC_TV_RGB                    1
+#       define OUTPUT_CSC_YCBCR_601                 2
+#       define OUTPUT_CSC_YCBCR_709                 3
+#       define OUTPUT_CSC_PROG_COEFF                4
+#       define OUTPUT_CSC_PROG_SHARED_MATRIXB       5
+
+/* DEGAMMA_CONTROL */
+#       define DEGAMMA_BYPASS                       0
+#       define DEGAMMA_SRGB_24                      1
+#       define DEGAMMA_XVYCC_222                    2
+
+/* GAMUT_REMAP_CONTROL */
+#       define GAMUT_REMAP_BYPASS                   0
+#       define GAMUT_REMAP_PROG_COEFF               1
+#       define GAMUT_REMAP_PROG_SHARED_MATRIXA      2
+#       define GAMUT_REMAP_PROG_SHARED_MATRIXB      3
+
+/* REGAMMA_CONTROL */
+#       define REGAMMA_BYPASS                       0
+#       define REGAMMA_SRGB_24                      1
+#       define REGAMMA_XVYCC_222                    2
+#       define REGAMMA_PROG_A                       3
+#       define REGAMMA_PROG_B                       4
+
+
+/* INPUT_GAMMA_CONTROL */
+#       define INPUT_GAMMA_USE_LUT                  0
+#       define INPUT_GAMMA_BYPASS                   1
+#       define INPUT_GAMMA_SRGB_24                  2
+#       define INPUT_GAMMA_XVYCC_222                3
 
 #define MC_SEQ_MISC0__MT__MASK	0xf0000000
 #define MC_SEQ_MISC0__MT__GDDR1  0x10000000
@@ -2113,20 +785,9 @@
 #define PACKET3_SEM_SEL_SIGNAL	    (0x6 << 29)
 #define PACKET3_SEM_SEL_WAIT	    (0x7 << 29)
 
-#define CONFIG_CNTL	0x1509
-#define CC_DRM_ID_STRAPS	0X1559
 #define AMDGPU_PCIE_INDEX	0xc
 #define AMDGPU_PCIE_DATA	0xd
 
-#define DMA_SEM_INCOMPLETE_TIMER_CNTL                     0x3411
-#define DMA_SEM_WAIT_FAIL_TIMER_CNTL                      0x3412
-#define DMA_MODE                                          0x342f
-#define DMA_RB_RPTR_ADDR_HI                               0x3407
-#define DMA_RB_RPTR_ADDR_LO                               0x3408
-#define DMA_BUSY_MASK 0x20
-#define DMA1_BUSY_MASK 0X40
-#define SDMA_MAX_INSTANCE 2
-
 #define PCIE_BUS_CLK    10000
 #define TCLK            (PCIE_BUS_CLK / 10)
 #define	PCIE_PORT_INDEX					0xe
@@ -2136,8 +797,6 @@
 #define EVERGREEN_PIF_PHY1_INDEX                        0x10
 #define EVERGREEN_PIF_PHY1_DATA				0x14
 
-#define	MC_VM_FB_OFFSET					0x81a
-
 /* Discrete VCE clocks */
 #define CG_VCEPLL_FUNC_CNTL                             0xc0030600
 #define    VCEPLL_RESET_MASK                            0x00000001
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 659eab9b90be..c457be3a3c56 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -584,6 +584,8 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
 		 * Enable triggering of GPU reset only if specified
 		 * by module parameter.
 		 */
+		if (adev->pcie_reset_ctx.in_link_reset)
+			return AMD_RESET_METHOD_LINK;
 		if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5)
 			return AMD_RESET_METHOD_MODE2;
 		else if (!(adev->flags & AMD_IS_APU))
@@ -640,6 +642,9 @@ asic_reset:
 	case AMD_RESET_METHOD_MODE2:
 		dev_info(adev->dev, "MODE2 reset\n");
 		return amdgpu_dpm_mode2_reset(adev);
+	case AMD_RESET_METHOD_LINK:
+		dev_info(adev->dev, "Link reset\n");
+		return amdgpu_device_link_reset(adev);
 	default:
 		dev_info(adev->dev, "MODE1 reset\n");
 		return amdgpu_device_mode1_reset(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index a5000c171c02..cf93fa477674 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -552,6 +552,11 @@
 #              define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x)  ((x) << 2)
 #              define PACKET3_QUERY_STATUS_ENG_SEL(x)          ((x) << 25)
 
+#define PACKET3_RUN_CLEANER_SHADER_9_0                      0xD7
+/* 1. header
+ * 2. RESERVED [31:0]
+ */
+
 #define PACKET3_RUN_CLEANER_SHADER                      0xD2
 /* 1. header
  * 2. RESERVED [31:0]
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 0e404c074975..da00d6b3b6a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -220,6 +220,13 @@ static int umc_v12_0_convert_error_address(struct amdgpu_device *adev,
 		nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
 
 	/* other nps modes are taken as nps1 */
+	if (nps == AMDGPU_NPS2_PARTITION_MODE) {
+		loop_bits[0] = UMC_V12_0_PA_CH5_BIT;
+		loop_bits[1] = UMC_V12_0_PA_C2_BIT;
+		loop_bits[2] = UMC_V12_0_PA_B1_BIT;
+		loop_bits[3] = UMC_V12_0_PA_R12_BIT;
+	}
+
 	if (nps == AMDGPU_NPS4_PARTITION_MODE) {
 		loop_bits[0] = UMC_V12_0_PA_CH4_BIT;
 		loop_bits[1] = UMC_V12_0_PA_CH5_BIT;
@@ -517,6 +524,9 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
 
 	if (adev->gmc.gmc_funcs->query_mem_partition_mode)
 		nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+
+	if (nps == AMDGPU_NPS2_PARTITION_MODE)
+		shift_bit = UMC_V12_0_PA_B1_BIT;
 	if (nps == AMDGPU_NPS4_PARTITION_MODE)
 		shift_bit = UMC_V12_0_PA_B0_BIT;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
index 9298018d938f..056bbc038312 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -65,12 +65,14 @@
 /* row bits in SOC physical address */
 #define UMC_V12_0_PA_R0_BIT 22
 #define UMC_V12_0_PA_R11_BIT 33
+#define UMC_V12_0_PA_R12_BIT 34
 #define UMC_V12_0_PA_R13_BIT 35
 /* channel bit in SOC physical address */
 #define UMC_V12_0_PA_CH4_BIT 12
 #define UMC_V12_0_PA_CH5_BIT 13
 /* bank bit in SOC physical address */
 #define UMC_V12_0_PA_B0_BIT 19
+#define UMC_V12_0_PA_B1_BIT 20
 /* row bits in MCA address */
 #define UMC_V12_0_MA_R0_BIT 10
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 8e7a36f26e9c..b8d835c9e17e 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -39,6 +39,7 @@
 
 #define VCN_VID_SOC_ADDRESS_2_0					0x1fa00
 #define VCN1_VID_SOC_ADDRESS_3_0				0x48200
+#define VCN1_AON_SOC_ADDRESS_3_0				0x48000
 
 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET			0x1fd
 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET			0x503
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index d716510b8dd6..3eec1b8feaee 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -39,6 +39,7 @@
 
 #define VCN_VID_SOC_ADDRESS_2_0					0x1fa00
 #define VCN1_VID_SOC_ADDRESS_3_0				0x48200
+#define VCN1_AON_SOC_ADDRESS_3_0				0x48000
 
 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET			0x27
 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET			0x0f
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 22ae1939476f..0b19f0ab4480 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -40,6 +40,7 @@
 
 #define VCN_VID_SOC_ADDRESS_2_0					0x1fa00
 #define VCN1_VID_SOC_ADDRESS_3_0				0x48200
+#define VCN1_AON_SOC_ADDRESS_3_0				0x48000
 
 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET			0x27
 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET			0x0f
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index c6f6392c1c20..8fff470bce87 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -46,6 +46,7 @@
 
 #define VCN_VID_SOC_ADDRESS_2_0							0x1fb00
 #define VCN1_VID_SOC_ADDRESS_3_0						0x48300
+#define VCN1_AON_SOC_ADDRESS_3_0						0x48000
 
 #define VCN_HARVEST_MMSCH								0
 
@@ -238,9 +239,9 @@ static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
 			adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
 	}
 
-	/* TODO: Add queue reset mask when FW fully supports it */
 	adev->vcn.supported_reset =
 		amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+	adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
 
 	if (amdgpu_sriov_vf(adev)) {
 		r = amdgpu_virt_alloc_mm_table(adev);
@@ -614,7 +615,8 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
 
 	/* VCN global tiling registers */
 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
-		VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+			VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG),
+			adev->gfx.config.gb_addr_config, 0, indirect);
 }
 
 /**
@@ -1945,6 +1947,20 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
 	return 0;
 }
 
+static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+
+	if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+		return -EOPNOTSUPP;
+
+	vcn_v4_0_stop(vinst);
+	vcn_v4_0_start(vinst);
+
+	return amdgpu_ring_test_helper(ring);
+}
+
 static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
 	.type = AMDGPU_RING_TYPE_VCN_ENC,
 	.align_mask = 0x3f,
@@ -1974,6 +1990,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
 	.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
 	.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+	.reset = vcn_v4_0_ring_reset,
 };
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 3e176b4b7c69..712e1fba33ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -45,6 +45,7 @@
 
 #define VCN_VID_SOC_ADDRESS_2_0		0x1fb00
 #define VCN1_VID_SOC_ADDRESS_3_0	0x48300
+#define VCN1_AON_SOC_ADDRESS_3_0	0x48000
 
 static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = {
 	SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS),
@@ -287,6 +288,31 @@ static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block)
 	return 0;
 }
 
+static int vcn_v4_0_3_hw_init_inst(struct amdgpu_vcn_inst *vinst)
+{
+	int vcn_inst;
+	struct amdgpu_device *adev = vinst->adev;
+	struct amdgpu_ring *ring;
+	int inst_idx = vinst->inst;
+
+	vcn_inst = GET_INST(VCN, inst_idx);
+	ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+	if (ring->use_doorbell) {
+		adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+			(adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst,
+			adev->vcn.inst[inst_idx].aid_id);
+
+		WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+			ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+			VCN_RB1_DB_CTRL__EN_MASK);
+
+		/* Read DB_CTRL to flush the write DB_CTRL command. */
+		RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+	}
+
+	return 0;
+}
+
 /**
  * vcn_v4_0_3_hw_init - start and test VCN block
  *
@@ -298,7 +324,8 @@ static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block)
 {
 	struct amdgpu_device *adev = ip_block->adev;
 	struct amdgpu_ring *ring;
-	int i, r, vcn_inst;
+	struct amdgpu_vcn_inst *vinst;
+	int i, r;
 
 	if (amdgpu_sriov_vf(adev)) {
 		r = vcn_v4_0_3_start_sriov(adev);
@@ -321,28 +348,9 @@ static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block)
 		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
 			struct amdgpu_vcn4_fw_shared *fw_shared;
 
-			vcn_inst = GET_INST(VCN, i);
 			ring = &adev->vcn.inst[i].ring_enc[0];
-
-			if (ring->use_doorbell) {
-				adev->nbio.funcs->vcn_doorbell_range(
-					adev, ring->use_doorbell,
-					(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-						9 * vcn_inst,
-					adev->vcn.inst[i].aid_id);
-
-				WREG32_SOC15(
-					VCN, GET_INST(VCN, ring->me),
-					regVCN_RB1_DB_CTRL,
-					ring->doorbell_index
-							<< VCN_RB1_DB_CTRL__OFFSET__SHIFT |
-						VCN_RB1_DB_CTRL__EN_MASK);
-
-				/* Read DB_CTRL to flush the write DB_CTRL command. */
-				RREG32_SOC15(
-					VCN, GET_INST(VCN, ring->me),
-					regVCN_RB1_DB_CTRL);
-			}
+			vinst = &adev->vcn.inst[i];
+			vcn_v4_0_3_hw_init_inst(vinst);
 
 			/* Re-init fw_shared when RAS fatal error occurred */
 			fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
@@ -1563,6 +1571,37 @@ static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring)
 	}
 }
 
+static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+	int r = 0;
+	int vcn_inst;
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+
+	if (amdgpu_sriov_vf(ring->adev))
+		return -EOPNOTSUPP;
+
+	if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+		return -EOPNOTSUPP;
+
+	vcn_inst = GET_INST(VCN, ring->me);
+	r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst);
+
+	if (r) {
+		DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r);
+		return r;
+	}
+
+	/* This flag is not set for VF, assumed to be disabled always */
+	if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
+		adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
+	vcn_v4_0_3_hw_init_inst(vinst);
+	vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram);
+	r = amdgpu_ring_test_helper(ring);
+
+	return r;
+}
+
 static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
 	.type = AMDGPU_RING_TYPE_VCN_ENC,
 	.align_mask = 0x3f,
@@ -1591,6 +1630,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
 	.emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg,
 	.emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+	.reset = vcn_v4_0_3_ring_reset,
 };
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index ba603b2246e2..ed00d35039c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -46,6 +46,7 @@
 
 #define VCN_VID_SOC_ADDRESS_2_0						0x1fb00
 #define VCN1_VID_SOC_ADDRESS_3_0					(0x48300 + 0x38000)
+#define VCN1_AON_SOC_ADDRESS_3_0					(0x48000 + 0x38000)
 
 #define VCN_HARVEST_MMSCH							0
 
@@ -207,6 +208,10 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block)
 		if (amdgpu_sriov_vf(adev))
 			fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
 
+		fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT;
+		fw_shared->drm_key_wa.method =
+			AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
+
 		if (amdgpu_vcnfw_log)
 			amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
 
@@ -214,6 +219,13 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block)
 			adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode;
 	}
 
+	adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+	adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+	r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+	if (r)
+		return r;
+
 	if (amdgpu_sriov_vf(adev)) {
 		r = amdgpu_virt_alloc_mm_table(adev);
 		if (r)
@@ -1435,6 +1447,20 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring)
 	}
 }
 
+static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+
+	if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+		return -EOPNOTSUPP;
+
+	vcn_v4_0_5_stop(vinst);
+	vcn_v4_0_5_start(vinst);
+
+	return amdgpu_ring_test_helper(ring);
+}
+
 static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
 	.type = AMDGPU_RING_TYPE_VCN_ENC,
 	.align_mask = 0x3f,
@@ -1462,6 +1488,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
 	.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
 	.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+	.reset = vcn_v4_0_5_ring_reset,
 };
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
index d99d05f42f1d..27dcc6f37a73 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
@@ -196,9 +196,9 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block)
 			adev->vcn.inst[i].pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode;
 	}
 
-	/* TODO: Add queue reset mask when FW fully supports it */
 	adev->vcn.supported_reset =
 		amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+	adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
 
 	vcn_v5_0_0_alloc_ip_dump(adev);
 
@@ -533,7 +533,8 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
 
 	/* VCN global tiling registers */
 	WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
-		VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+		VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG),
+		adev->gfx.config.gb_addr_config, 0, indirect);
 
 	return;
 }
@@ -1171,6 +1172,20 @@ static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
 	}
 }
 
+static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+
+	if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+		return -EOPNOTSUPP;
+
+	vcn_v5_0_0_stop(vinst);
+	vcn_v5_0_0_start(vinst);
+
+	return amdgpu_ring_test_helper(ring);
+}
+
 static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = {
 	.type = AMDGPU_RING_TYPE_VCN_ENC,
 	.align_mask = 0x3f,
@@ -1198,6 +1213,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = {
 	.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
 	.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+	.reset = vcn_v5_0_0_ring_reset,
 };
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
index e0e84ef7f568..60ee6e02e6ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
@@ -126,7 +126,7 @@ static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
 
 		ring = &adev->vcn.inst[i].ring_enc[0];
 		ring->use_doorbell = true;
-		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst;
+		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 11 * vcn_inst;
 
 		ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
 		sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
@@ -213,7 +213,7 @@ static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
 		if (ring->use_doorbell)
 			adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
 				((adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
-				 9 * vcn_inst),
+				 11 * vcn_inst),
 				adev->vcn.inst[i].aid_id);
 
 		/* Re-init fw_shared, if required */
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index faa0dd75dd6d..85846fd08ce4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -350,6 +350,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
 			if (ret)
 				return ret;
 		}
+		ih[i]->overflow = false;
 	}
 
 	if (!amdgpu_sriov_vf(adev))
@@ -437,7 +438,10 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
 	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
 		goto out;
 
-	wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+	if (!amdgpu_sriov_vf(adev))
+		wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+	else
+		ih->overflow = true;
 
 	/* When a ring buffer overflow happen start parsing interrupt
 	 * from the last not overwritten vector (wptr + 32). Hopefully
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
index 4a5a0a4e00f2..9bde2c64540f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
@@ -27,6 +27,16 @@
 #include "kfd_priv.h"
 
 static struct dentry *debugfs_root;
+static struct dentry *debugfs_proc;
+static struct list_head procs;
+
+struct debugfs_proc_entry {
+	struct list_head list;
+	struct dentry *proc_dentry;
+	pid_t pid;
+};
+
+#define MAX_DEBUGFS_FILENAME_LEN 32
 
 static int kfd_debugfs_open(struct inode *inode, struct file *file)
 {
@@ -92,6 +102,8 @@ static const struct file_operations kfd_debugfs_hang_hws_fops = {
 void kfd_debugfs_init(void)
 {
 	debugfs_root = debugfs_create_dir("kfd", NULL);
+	debugfs_proc = debugfs_create_dir("proc", debugfs_root);
+	INIT_LIST_HEAD(&procs);
 
 	debugfs_create_file("mqds", S_IFREG | 0444, debugfs_root,
 			    kfd_debugfs_mqds_by_process, &kfd_debugfs_fops);
@@ -107,5 +119,69 @@ void kfd_debugfs_init(void)
 
 void kfd_debugfs_fini(void)
 {
+	debugfs_remove_recursive(debugfs_proc);
 	debugfs_remove_recursive(debugfs_root);
 }
+
+static ssize_t kfd_debugfs_pasid_read(struct file *file, char __user *buf,
+				      size_t count, loff_t *ppos)
+{
+	struct kfd_process_device *pdd = file_inode(file)->i_private;
+	char tmp[32];
+	int len;
+
+	len = snprintf(tmp, sizeof(tmp), "%u\n", pdd->pasid);
+
+	return simple_read_from_buffer(buf, count, ppos, tmp, len);
+}
+
+static const struct file_operations kfd_debugfs_pasid_fops = {
+	.owner = THIS_MODULE,
+	.read = kfd_debugfs_pasid_read,
+};
+
+void kfd_debugfs_add_process(struct kfd_process *p)
+{
+	int i;
+	char name[MAX_DEBUGFS_FILENAME_LEN];
+	struct debugfs_proc_entry *entry;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return;
+
+	list_add(&entry->list, &procs);
+	entry->pid = p->lead_thread->pid;
+	snprintf(name, MAX_DEBUGFS_FILENAME_LEN, "%d",
+		 (int)entry->pid);
+	entry->proc_dentry = debugfs_create_dir(name, debugfs_proc);
+
+	/* Create debugfs files for each GPU:
+	 * - proc/<pid>/pasid_<gpuid>
+	 */
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		snprintf(name, MAX_DEBUGFS_FILENAME_LEN, "pasid_%u",
+			 pdd->dev->id);
+		debugfs_create_file((const char *)name, S_IFREG | 0444,
+				    entry->proc_dentry, pdd,
+				    &kfd_debugfs_pasid_fops);
+	}
+}
+
+void kfd_debugfs_remove_process(struct kfd_process *p)
+{
+	struct debugfs_proc_entry *entry, *next;
+
+	mutex_lock(&kfd_processes_mutex);
+	list_for_each_entry_safe(entry, next, &procs, list) {
+		if (entry->pid != p->lead_thread->pid)
+			continue;
+
+		debugfs_remove_recursive(entry->proc_dentry);
+		list_del(&entry->list);
+		kfree(entry);
+	}
+	mutex_unlock(&kfd_processes_mutex);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index b9c82be6ce13..bf0854bd5555 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -352,11 +352,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
 			f2g = &aldebaran_kfd2kgd;
 			break;
 		case IP_VERSION(9, 4, 3):
-			gfx_target_version = adev->rev_id >= 1 ? 90402
-					   : adev->flags & AMD_IS_APU ? 90400
-					   : 90401;
-			f2g = &gc_9_4_3_kfd2kgd;
-			break;
 		case IP_VERSION(9, 4, 4):
 			gfx_target_version = 90402;
 			f2g = &gc_9_4_3_kfd2kgd;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c610e172a2b8..76359c6a3f3a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1576,8 +1576,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
 	int bit;
 
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-		if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
-			dev_err(dev, "No more SDMA queue to allocate\n");
+		if (bitmap_empty(dqm->sdma_bitmap, get_num_sdma_queues(dqm))) {
+			dev_warn(dev, "No more SDMA queue to allocate (%d total queues)\n",
+				 get_num_sdma_queues(dqm));
 			return -ENOMEM;
 		}
 
@@ -1602,8 +1603,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
 		q->properties.sdma_queue_id = q->sdma_id /
 				kfd_get_num_sdma_engines(dqm->dev);
 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
-		if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
-			dev_err(dev, "No more XGMI SDMA queue to allocate\n");
+		if (bitmap_empty(dqm->xgmi_sdma_bitmap, get_num_xgmi_sdma_queues(dqm))) {
+			dev_warn(dev, "No more XGMI SDMA queue to allocate (%d total queues)\n",
+				 get_num_xgmi_sdma_queues(dqm));
 			return -ENOMEM;
 		}
 		if (restore_sdma_id) {
@@ -1662,8 +1664,8 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
 		}
 
 		if (!free_bit_found) {
-			dev_err(dev, "No more SDMA queue to allocate for target ID %i\n",
-				q->properties.sdma_engine_id);
+			dev_warn(dev, "No more SDMA queue to allocate for target ID %i (%d total queues)\n",
+				 q->properties.sdma_engine_id, num_queues);
 			return -ENOMEM;
 		}
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index fecdb6794075..e54e708ed82d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1177,6 +1177,25 @@ void kfd_signal_hw_exception_event(u32 pasid)
 	kfd_unref_process(p);
 }
 
+void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va)
+{
+	struct kfd_process_device *pdd;
+	struct kfd_hsa_memory_exception_data exception_data;
+	int i;
+
+	memset(&exception_data, 0, sizeof(exception_data));
+	exception_data.va = gpu_va;
+	exception_data.failure.NotPresent = 1;
+
+	// Send VM seg fault to all kfd process device
+	for (i = 0; i < p->n_pdds; i++) {
+		pdd = p->pdds[i];
+		exception_data.gpu_id = pdd->user_gpu_id;
+		kfd_evict_process_device(pdd);
+		kfd_signal_vm_fault_event(pdd, NULL, &exception_data);
+	}
+}
+
 void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
 				struct kfd_vm_fault_info *info,
 				struct kfd_hsa_memory_exception_data *data)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
index 37b69fe0ede3..54870b4c5000 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
@@ -168,11 +168,12 @@ static bool event_interrupt_isr_v10(struct kfd_node *dev,
 	    client_id != SOC15_IH_CLIENTID_SE3SH)
 		return false;
 
-	pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
-		 client_id, source_id, vmid, pasid);
-	pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
-		 data[0], data[1], data[2], data[3],
-		 data[4], data[5], data[6], data[7]);
+	dev_dbg(dev->adev->dev,
+		"client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+		client_id, source_id, vmid, pasid);
+	dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+		data[0], data[1], data[2], data[3], data[4], data[5], data[6],
+		data[7]);
 
 	/* If there is no valid PASID, it's likely a bug */
 	if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
@@ -217,37 +218,66 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
 						SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING);
 			switch (encoding) {
 			case SQ_INTERRUPT_WORD_ENCODING_AUTO:
-				pr_debug_ratelimited(
+				dev_dbg_ratelimited(
+					dev->adev->dev,
 					"sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf0_full %d, ttrac_buf1_full %d, ttrace_utc_err %d\n",
-					REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_AUTO_CTXID1,
-							SE_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
-							THREAD_TRACE),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
-							WLT),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
-							THREAD_TRACE_BUF0_FULL),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
-							THREAD_TRACE_BUF1_FULL),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
-							THREAD_TRACE_UTC_ERROR));
+					REG_GET_FIELD(
+						context_id1,
+						SQ_INTERRUPT_WORD_AUTO_CTXID1,
+						SE_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_AUTO_CTXID0,
+						THREAD_TRACE),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_AUTO_CTXID0,
+						WLT),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_AUTO_CTXID0,
+						THREAD_TRACE_BUF0_FULL),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_AUTO_CTXID0,
+						THREAD_TRACE_BUF1_FULL),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_AUTO_CTXID0,
+						THREAD_TRACE_UTC_ERROR));
 				break;
 			case SQ_INTERRUPT_WORD_ENCODING_INST:
-				pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
-					REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
-							SE_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
-							DATA),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
-							SA_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
-							PRIV),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
-							WAVE_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
-							SIMD_ID),
-					REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
-							WGP_ID));
+				dev_dbg_ratelimited(
+					dev->adev->dev,
+					"sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+					REG_GET_FIELD(
+						context_id1,
+						SQ_INTERRUPT_WORD_WAVE_CTXID1,
+						SE_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID0,
+						DATA),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID0,
+						SA_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID0,
+						PRIV),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID0,
+						WAVE_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID0,
+						SIMD_ID),
+					REG_GET_FIELD(
+						context_id1,
+						SQ_INTERRUPT_WORD_WAVE_CTXID1,
+						WGP_ID));
 				if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK) {
 					if (kfd_set_dbg_ev_from_interrupt(dev, pasid,
 							KFD_DEBUG_DOORBELL_ID(context_id0),
@@ -259,21 +289,37 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
 			case SQ_INTERRUPT_WORD_ENCODING_ERROR:
 				sq_intr_err_type = REG_GET_FIELD(context_id0, KFD_CTXID0,
 								ERR_TYPE);
-				pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n",
-					REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
-							SE_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
-							DATA),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
-							SA_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
-							PRIV),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
-							WAVE_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
-							SIMD_ID),
-					REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
-							WGP_ID),
+				dev_warn_ratelimited(
+					dev->adev->dev,
+					"sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n",
+					REG_GET_FIELD(
+						context_id1,
+						SQ_INTERRUPT_WORD_WAVE_CTXID1,
+						SE_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID0,
+						DATA),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID0,
+						SA_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID0,
+						PRIV),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID0,
+						WAVE_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID0,
+						SIMD_ID),
+					REG_GET_FIELD(
+						context_id1,
+						SQ_INTERRUPT_WORD_WAVE_CTXID1,
+						WGP_ID),
 					sq_intr_err_type);
 				break;
 			default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
index c5f97e6e36ff..eb88ba8d8b01 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
@@ -148,44 +148,69 @@ enum SQ_INTERRUPT_ERROR_TYPE {
 #define KFD_CTXID0_DOORBELL_ID(ctxid0)		((ctxid0) & \
 				KFD_CTXID0_DOORBELL_ID_MASK)
 
-static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1)
+static void print_sq_intr_info_auto(struct kfd_node *dev, uint32_t context_id0,
+				    uint32_t context_id1)
 {
-	pr_debug_ratelimited(
+	dev_dbg_ratelimited(
+		dev->adev->dev,
 		"sq_intr: auto, ttrace %d, wlt %d, ttrace_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+			      THREAD_TRACE),
 		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT),
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_BUF_FULL),
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, REG_TIMESTAMP),
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, CMD_TIMESTAMP),
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_CMD_OVERFLOW),
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_REG_OVERFLOW),
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, IMMED_OVERFLOW),
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_UTC_ERROR));
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+			      THREAD_TRACE_BUF_FULL),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+			      REG_TIMESTAMP),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+			      CMD_TIMESTAMP),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+			      HOST_CMD_OVERFLOW),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+			      HOST_REG_OVERFLOW),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+			      IMMED_OVERFLOW),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+			      THREAD_TRACE_UTC_ERROR));
 }
 
-static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1)
+static void print_sq_intr_info_inst(struct kfd_node *dev, uint32_t context_id0,
+				    uint32_t context_id1)
 {
-	pr_debug_ratelimited(
+	dev_dbg_ratelimited(
+		dev->adev->dev,
 		"sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
 		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA),
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, SH_ID),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+			      SH_ID),
 		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV),
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, WAVE_ID),
-		REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, SIMD_ID),
-		REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID));
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+			      WAVE_ID),
+		REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
+			      SIMD_ID),
+		REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
+			      WGP_ID));
 }
 
-static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1)
+static void print_sq_intr_info_error(struct kfd_node *dev, uint32_t context_id0,
+				     uint32_t context_id1)
 {
-	pr_warn_ratelimited(
+	dev_warn_ratelimited(
+		dev->adev->dev,
 		"sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, DETAIL),
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE),
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, SH_ID),
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, PRIV),
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, WAVE_ID),
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, SIMD_ID),
-		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, WGP_ID));
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+			      DETAIL),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+			      TYPE),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+			      SH_ID),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+			      PRIV),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+			      WAVE_ID),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1,
+			      SIMD_ID),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1,
+			      WGP_ID));
 }
 
 static void event_interrupt_poison_consumption_v11(struct kfd_node *dev,
@@ -255,11 +280,12 @@ static bool event_interrupt_isr_v11(struct kfd_node *dev,
 	    (context_id0 & AMDGPU_FENCE_MES_QUEUE_FLAG))
 		return false;
 
-	pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
-		 client_id, source_id, vmid, pasid);
-	pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
-		 data[0], data[1], data[2], data[3],
-		 data[4], data[5], data[6], data[7]);
+	dev_dbg(dev->adev->dev,
+		"client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+		client_id, source_id, vmid, pasid);
+	dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+		data[0], data[1], data[2], data[3], data[4], data[5], data[6],
+		data[7]);
 
 	/* If there is no valid PASID, it's likely a bug */
 	if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
@@ -353,10 +379,10 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
 					SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING);
 			switch (sq_int_enc) {
 			case SQ_INTERRUPT_WORD_ENCODING_AUTO:
-				print_sq_intr_info_auto(context_id0, context_id1);
+				print_sq_intr_info_auto(dev, context_id0, context_id1);
 				break;
 			case SQ_INTERRUPT_WORD_ENCODING_INST:
-				print_sq_intr_info_inst(context_id0, context_id1);
+				print_sq_intr_info_inst(dev, context_id0, context_id1);
 				sq_int_priv = REG_GET_FIELD(context_id0,
 						SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV);
 				if (sq_int_priv && (kfd_set_dbg_ev_from_interrupt(dev, pasid,
@@ -366,7 +392,7 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
 					return;
 				break;
 			case SQ_INTERRUPT_WORD_ENCODING_ERROR:
-				print_sq_intr_info_error(context_id0, context_id1);
+				print_sq_intr_info_error(dev, context_id0, context_id1);
 				sq_int_errtype = REG_GET_FIELD(context_id0,
 						SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE);
 				if (sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index b8a91bf4ef30..4ceb251312a6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -314,11 +314,12 @@ static bool event_interrupt_isr_v9(struct kfd_node *dev,
 					& ~pasid_mask) | pasid);
 	}
 
-	pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
-		 client_id, source_id, vmid, pasid);
-	pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
-		 data[0], data[1], data[2], data[3],
-		 data[4], data[5], data[6], data[7]);
+	dev_dbg(dev->adev->dev,
+		"client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+		client_id, source_id, vmid, pasid);
+	dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+		data[0], data[1], data[2], data[3], data[4], data[5], data[6],
+		data[7]);
 
 	/* If there is no valid PASID, it's likely a bug */
 	if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
@@ -379,28 +380,82 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
 			encoding = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, ENCODING);
 			switch (encoding) {
 			case SQ_INTERRUPT_WORD_ENCODING_AUTO:
-				pr_debug_ratelimited(
+				dev_dbg_ratelimited(
+					dev->adev->dev,
 					"sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, SE_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, WLT),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_BUF_FULL),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, REG_TIMESTAMP),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, CMD_TIMESTAMP),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_CMD_OVERFLOW),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_REG_OVERFLOW),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, IMMED_OVERFLOW),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_UTC_ERROR));
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_AUTO_CTXID,
+						SE_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_AUTO_CTXID,
+						THREAD_TRACE),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_AUTO_CTXID,
+						WLT),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_AUTO_CTXID,
+						THREAD_TRACE_BUF_FULL),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_AUTO_CTXID,
+						REG_TIMESTAMP),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_AUTO_CTXID,
+						CMD_TIMESTAMP),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_AUTO_CTXID,
+						HOST_CMD_OVERFLOW),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_AUTO_CTXID,
+						HOST_REG_OVERFLOW),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_AUTO_CTXID,
+						IMMED_OVERFLOW),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_AUTO_CTXID,
+						THREAD_TRACE_UTC_ERROR));
 				break;
 			case SQ_INTERRUPT_WORD_ENCODING_INST:
-				pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n",
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID),
+				dev_dbg_ratelimited(
+					dev->adev->dev,
+					"sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n",
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID,
+						SE_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID,
+						DATA),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID,
+						SH_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID,
+						PRIV),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID,
+						WAVE_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID,
+						SIMD_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID,
+						CU_ID),
 					sq_int_data);
 				if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV_MASK) {
 					if (kfd_set_dbg_ev_from_interrupt(dev, pasid,
@@ -412,14 +467,37 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
 				break;
 			case SQ_INTERRUPT_WORD_ENCODING_ERROR:
 				sq_intr_err = REG_GET_FIELD(sq_int_data, KFD_SQ_INT_DATA, ERR_TYPE);
-				pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n",
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID),
-					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID),
+				dev_warn_ratelimited(
+					dev->adev->dev,
+					"sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n",
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID,
+						SE_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID,
+						DATA),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID,
+						SH_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID,
+						PRIV),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID,
+						WAVE_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID,
+						SIMD_ID),
+					REG_GET_FIELD(
+						context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID,
+						CU_ID),
 					sq_intr_err);
 				if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
 					sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 80320a6c8854..97933d2a3803 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -495,6 +495,10 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
 	m->sdma_engine_id = q->sdma_engine_id;
 	m->sdma_queue_id = q->sdma_queue_id;
 	m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+	/* Allow context switch so we don't cross-process starve with a massive
+	 * command buffer of long-running SDMA commands
+	 */
+	m->sdmax_rlcx_ib_cntl |= SDMA0_GFX_IB_CNTL__SWITCH_INSIDE_IB_MASK;
 
 	q->is_active = QUEUE_IS_ACTIVE(*q);
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 2893fd5e5d00..fa28c57692b8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -43,7 +43,7 @@ static int pm_map_process_v9(struct packet_manager *pm,
 	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
 	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
 					sizeof(struct pm4_mes_map_process));
-	if (adev->enforce_isolation[kfd->node_id])
+	if (adev->enforce_isolation[kfd->node_id] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
 		packet->bitfields2.exec_cleaner_shader = 1;
 	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
 	packet->bitfields2.process_quantum = 10;
@@ -102,7 +102,8 @@ static int pm_map_process_aldebaran(struct packet_manager *pm,
 	memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran));
 	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
 			sizeof(struct pm4_mes_map_process_aldebaran));
-	if (adev->enforce_isolation[knode->node_id])
+	if (adev->enforce_isolation[knode->node_id] ==
+	    AMDGPU_ENFORCE_ISOLATION_ENABLE)
 		packet->bitfields2.exec_cleaner_shader = 1;
 	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
 	packet->bitfields2.process_quantum = 10;
@@ -165,9 +166,9 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
 	 * hws_max_conc_proc has been done in
 	 * kgd2kfd_device_init().
 	 */
-	concurrent_proc_cnt = adev->enforce_isolation[kfd->node_id] ?
-			1 : min(pm->dqm->processes_count,
-			kfd->max_proc_per_quantum);
+	concurrent_proc_cnt = (adev->enforce_isolation[kfd->node_id] ==
+			       AMDGPU_ENFORCE_ISOLATION_ENABLE) ?
+		1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum);
 
 	packet = (struct pm4_mes_runlist *)buffer;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f6aedf69c644..d221c58dccc3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1507,6 +1507,8 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
 int kfd_get_num_events(struct kfd_process *p);
 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
 
+void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va);
+
 void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
 				struct kfd_vm_fault_info *info,
 				struct kfd_hsa_memory_exception_data *data);
@@ -1581,10 +1583,15 @@ int kfd_debugfs_hang_hws(struct kfd_node *dev);
 int pm_debugfs_hang_hws(struct packet_manager *pm);
 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm);
 
+void kfd_debugfs_add_process(struct kfd_process *p);
+void kfd_debugfs_remove_process(struct kfd_process *p);
+
 #else
 
 static inline void kfd_debugfs_init(void) {}
 static inline void kfd_debugfs_fini(void) {}
+static inline void kfd_debugfs_add_process(struct kfd_process *p) {}
+static inline void kfd_debugfs_remove_process(struct kfd_process *p) {}
 
 #endif
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 7c0c24732481..722ac1662bdc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -900,6 +900,8 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
 		kfd_procfs_add_sysfs_files(process);
 		kfd_procfs_add_sysfs_counters(process);
 
+		kfd_debugfs_add_process(process);
+
 		init_waitqueue_head(&process->wait_irq_drain);
 	}
 out:
@@ -1054,6 +1056,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
 	for (i = 0; i < p->n_pdds; i++) {
 		struct kfd_process_device *pdd = p->pdds[i];
 
+		kfd_smi_event_process(pdd, false);
+
 		pr_debug("Releasing pdd (topology id %d, for pid %d)\n",
 			pdd->dev->id, p->lead_thread->pid);
 		kfd_process_device_destroy_cwsr_dgpu(pdd);
@@ -1174,6 +1178,7 @@ static void kfd_process_wq_release(struct work_struct *work)
 		dma_fence_signal(ef);
 
 	kfd_process_remove_sysfs(p);
+	kfd_debugfs_remove_process(p);
 
 	kfd_process_kunmap_signal_bo(p);
 	kfd_process_free_outstanding_kfd_bos(p);
@@ -1715,6 +1720,8 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 	pdd->pasid = avm->pasid;
 	pdd->drm_file = drm_file;
 
+	kfd_smi_event_process(pdd, true);
+
 	return 0;
 
 err_get_pasid:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 7eb370b68159..6d5fa57d4a23 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -451,8 +451,15 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	}
 
 	if (retval != 0) {
-		pr_err("process pid %d DQM create queue type %d failed. ret %d\n",
-			pqm->process->lead_thread->pid, type, retval);
+		if ((type == KFD_QUEUE_TYPE_SDMA ||
+		    type == KFD_QUEUE_TYPE_SDMA_XGMI ||
+		    type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) &&
+		    retval == -ENOMEM)
+			pr_warn("process pid %d DQM create queue type %d failed. ret %d\n",
+				pqm->process->lead_thread->pid, type, retval);
+		else
+			pr_err("process pid %d DQM create queue type %d failed. ret %d\n",
+				pqm->process->lead_thread->pid, type, retval);
 		goto err_create_queue;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index 4afff7094caf..a65c67cf56ff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -402,7 +402,7 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
 {
 	u32 vgpr_size = 0x40000;
 
-	if ((gfxv / 100 * 100) == 90400 ||	/* GFX_VERSION_AQUA_VANJARAM */
+	if (gfxv == 90402 ||			/* GFX_VERSION_AQUA_VANJARAM */
 	    gfxv == 90010 ||			/* GFX_VERSION_ALDEBARAN */
 	    gfxv == 90008 ||			/* GFX_VERSION_ARCTURUS */
 	    gfxv == 90500)
@@ -462,7 +462,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
 
 	if (gfxv == 80002)	/* GFX_VERSION_TONGA */
 		props->eop_buffer_size = 0x8000;
-	else if ((gfxv / 100 * 100) == 90400)	/* GFX_VERSION_AQUA_VANJARAM */
+	else if (gfxv == 90402)	/* GFX_VERSION_AQUA_VANJARAM */
 		props->eop_buffer_size = 4096;
 	else if (gfxv >= 80000)
 		props->eop_buffer_size = 4096;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 9b8169761ec5..83d9384ac815 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -163,10 +163,9 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
 static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client,
 			       unsigned int event)
 {
-	uint64_t all = KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS);
 	uint64_t events = READ_ONCE(client->events);
 
-	if (pid && client->pid != pid && !(client->suser && (events & all)))
+	if (pid && client->pid != pid && !client->suser)
 		return false;
 
 	return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event);
@@ -345,6 +344,27 @@ void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
 			  pid, address, last - address + 1, node->id, trigger));
 }
 
+void kfd_smi_event_process(struct kfd_process_device *pdd, bool start)
+{
+	struct amdgpu_task_info *task_info;
+	struct amdgpu_vm *avm;
+
+	if (!pdd->drm_priv)
+		return;
+
+	avm = drm_priv_to_vm(pdd->drm_priv);
+	task_info = amdgpu_vm_get_task_info_vm(avm);
+
+	if (task_info) {
+		kfd_smi_event_add(0, pdd->dev,
+				  start ? KFD_SMI_EVENT_PROCESS_START :
+				  KFD_SMI_EVENT_PROCESS_END,
+				  KFD_EVENT_FMT_PROCESS(task_info->pid,
+				  task_info->task_name));
+		amdgpu_vm_put_task_info(task_info);
+	}
+}
+
 int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)
 {
 	struct kfd_smi_client *client;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index 503bff13d815..bb4d72b57387 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -53,4 +53,5 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm);
 void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
 				  unsigned long address, unsigned long last,
 				  uint32_t trigger);
+void kfd_smi_event_process(struct kfd_process_device *pdd, bool start);
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 100717a98ec1..72be6e152e88 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1245,8 +1245,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
 	case IP_VERSION(9, 4, 4):
 	case IP_VERSION(9, 5, 0):
 		if (ext_coherent)
-			mtype_local = (gc_ip_version < IP_VERSION(9, 5, 0) && !node->adev->rev_id) ?
-					AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_CC;
+			mtype_local = AMDGPU_VM_MTYPE_CC;
 		else
 			mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
 				amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 9bbee484d57c..baa2374acdeb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1267,34 +1267,41 @@ static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev,
 {
 	struct kfd_node *gpu = outbound_link->gpu;
 	struct amdgpu_device *adev = gpu->adev;
-	int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes;
+	unsigned int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes;
+	unsigned int num_xgmi_sdma_engines = kfd_get_num_xgmi_sdma_engines(gpu);
+	unsigned int num_sdma_engines = kfd_get_num_sdma_engines(gpu);
+	uint32_t sdma_eng_id_mask = (1 << num_sdma_engines) - 1;
+	uint32_t xgmi_sdma_eng_id_mask =
+			((1 << num_xgmi_sdma_engines) - 1) << num_sdma_engines;
+
 	bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu &&
 		adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 &&
-		kfd_get_num_xgmi_sdma_engines(gpu) >= 14 &&
-		(!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8);
+		num_xgmi_sdma_engines >= 6 && (!(adev->flags & AMD_IS_APU) &&
+		num_xgmi_nodes == 8);
 
 	if (support_rec_eng) {
 		int src_socket_id = adev->gmc.xgmi.physical_node_id;
 		int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id;
+		unsigned int reshift = num_xgmi_sdma_engines == 6 ? 1 : 0;
 
 		outbound_link->rec_sdma_eng_id_mask =
-			1 << rec_sdma_eng_map[src_socket_id][dst_socket_id];
+			1 << (rec_sdma_eng_map[src_socket_id][dst_socket_id] >> reshift);
 		inbound_link->rec_sdma_eng_id_mask =
-			1 << rec_sdma_eng_map[dst_socket_id][src_socket_id];
-	} else {
-		int num_sdma_eng = kfd_get_num_sdma_engines(gpu);
-		int i, eng_offset = 0;
+			1 << (rec_sdma_eng_map[dst_socket_id][src_socket_id] >> reshift);
 
-		if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
-		    kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) {
-			eng_offset = num_sdma_eng;
-			num_sdma_eng = kfd_get_num_xgmi_sdma_engines(gpu);
-		}
+		/* If recommended engine is out of range, need to reset the mask */
+		if (outbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
+			outbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;
+		if (inbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
+			inbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;
 
-		for (i = 0; i < num_sdma_eng; i++) {
-			outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset));
-			inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset));
-		}
+	} else {
+		uint32_t engine_mask = (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
+				num_xgmi_sdma_engines && to_dev->gpu) ? xgmi_sdma_eng_id_mask :
+				sdma_eng_id_mask;
+
+		outbound_link->rec_sdma_eng_id_mask = engine_mask;
+		inbound_link->rec_sdma_eng_id_mask = engine_mask;
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 536f73131c2d..8984e211dd1c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -115,6 +115,8 @@
 #include "modules/inc/mod_freesync.h"
 #include "modules/power/power_helpers.h"
 
+static_assert(AMDGPU_DMUB_NOTIFICATION_MAX == DMUB_NOTIFICATION_MAX, "AMDGPU_DMUB_NOTIFICATION_MAX mismatch");
+
 #define FIRMWARE_RENOIR_DMUB "amdgpu/renoir_dmcub.bin"
 MODULE_FIRMWARE(FIRMWARE_RENOIR_DMUB);
 #define FIRMWARE_SIENNA_CICHLID_DMUB "amdgpu/sienna_cichlid_dmcub.bin"
@@ -280,7 +282,7 @@ static u32 dm_vblank_get_counter(struct amdgpu_device *adev, int crtc)
 	acrtc = adev->mode_info.crtcs[crtc];
 
 	if (!acrtc->dm_irq_params.stream) {
-		DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n",
+		drm_err(adev_to_drm(adev), "dc_stream_state is NULL for crtc '%d'!\n",
 			  crtc);
 		return 0;
 	}
@@ -301,7 +303,7 @@ static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
 	acrtc = adev->mode_info.crtcs[crtc];
 
 	if (!acrtc->dm_irq_params.stream) {
-		DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n",
+		drm_err(adev_to_drm(adev), "dc_stream_state is NULL for crtc '%d'!\n",
 			  crtc);
 		return 0;
 	}
@@ -673,15 +675,21 @@ static void dm_crtc_high_irq(void *interrupt_params)
 	spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
 
 	if (acrtc->dm_irq_params.stream &&
-	    acrtc->dm_irq_params.vrr_params.supported &&
-	    acrtc->dm_irq_params.freesync_config.state ==
-		    VRR_STATE_ACTIVE_VARIABLE) {
+		acrtc->dm_irq_params.vrr_params.supported) {
+		bool replay_en = acrtc->dm_irq_params.stream->link->replay_settings.replay_feature_enabled;
+		bool psr_en = acrtc->dm_irq_params.stream->link->psr_settings.psr_feature_enabled;
+		bool fs_active_var_en = acrtc->dm_irq_params.freesync_config.state == VRR_STATE_ACTIVE_VARIABLE;
+
 		mod_freesync_handle_v_update(adev->dm.freesync_module,
 					     acrtc->dm_irq_params.stream,
 					     &acrtc->dm_irq_params.vrr_params);
 
-		dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc->dm_irq_params.stream,
-					   &acrtc->dm_irq_params.vrr_params.adjust);
+		/* update vmin_vmax only if freesync is enabled, or only if PSR and REPLAY are disabled */
+		if (fs_active_var_en || (!fs_active_var_en && !replay_en && !psr_en)) {
+			dc_stream_adjust_vmin_vmax(adev->dm.dc,
+					acrtc->dm_irq_params.stream,
+					&acrtc->dm_irq_params.vrr_params.adjust);
+		}
 	}
 
 	/*
@@ -749,6 +757,29 @@ static void dmub_aux_setconfig_callback(struct amdgpu_device *adev,
 		complete(&adev->dm.dmub_aux_transfer_done);
 }
 
+static void dmub_aux_fused_io_callback(struct amdgpu_device *adev,
+					struct dmub_notification *notify)
+{
+	if (!adev || !notify) {
+		ASSERT(false);
+		return;
+	}
+
+	const struct dmub_cmd_fused_request *req = &notify->fused_request;
+	const uint8_t ddc_line = req->u.aux.ddc_line;
+
+	if (ddc_line >= ARRAY_SIZE(adev->dm.fused_io)) {
+		ASSERT(false);
+		return;
+	}
+
+	struct fused_io_sync *sync = &adev->dm.fused_io[ddc_line];
+
+	static_assert(sizeof(*req) <= sizeof(sync->reply_data), "Size mismatch");
+	memcpy(sync->reply_data, req, sizeof(*req));
+	complete(&sync->replied);
+}
+
 /**
  * dmub_hpd_callback - DMUB HPD interrupt processing callback.
  * @adev: amdgpu_device pointer
@@ -772,18 +803,18 @@ static void dmub_hpd_callback(struct amdgpu_device *adev,
 		return;
 
 	if (notify == NULL) {
-		DRM_ERROR("DMUB HPD callback notification was NULL");
+		drm_err(adev_to_drm(adev), "DMUB HPD callback notification was NULL");
 		return;
 	}
 
 	if (notify->link_index > adev->dm.dc->link_count) {
-		DRM_ERROR("DMUB HPD index (%u)is abnormal", notify->link_index);
+		drm_err(adev_to_drm(adev), "DMUB HPD index (%u)is abnormal", notify->link_index);
 		return;
 	}
 
 	/* Skip DMUB HPD IRQ in suspend/resume. We will probe them later. */
 	if (notify->type == DMUB_NOTIFICATION_HPD && adev->in_suspend) {
-		DRM_INFO("Skip DMUB HPD IRQ callback in suspend/resume\n");
+		drm_info(adev_to_drm(adev), "Skip DMUB HPD IRQ callback in suspend/resume\n");
 		return;
 	}
 
@@ -800,11 +831,11 @@ static void dmub_hpd_callback(struct amdgpu_device *adev,
 		aconnector = to_amdgpu_dm_connector(connector);
 		if (link && aconnector->dc_link == link) {
 			if (notify->type == DMUB_NOTIFICATION_HPD)
-				DRM_INFO("DMUB HPD IRQ callback: link_index=%u\n", link_index);
+				drm_info(adev_to_drm(adev), "DMUB HPD IRQ callback: link_index=%u\n", link_index);
 			else if (notify->type == DMUB_NOTIFICATION_HPD_IRQ)
-				DRM_INFO("DMUB HPD RX IRQ callback: link_index=%u\n", link_index);
+				drm_info(adev_to_drm(adev), "DMUB HPD RX IRQ callback: link_index=%u\n", link_index);
 			else
-				DRM_WARN("DMUB Unknown HPD callback type %d, link_index=%u\n",
+				drm_warn(adev_to_drm(adev), "DMUB Unknown HPD callback type %d, link_index=%u\n",
 						notify->type, link_index);
 
 			hpd_aconnector = aconnector;
@@ -816,7 +847,7 @@ static void dmub_hpd_callback(struct amdgpu_device *adev,
 	if (hpd_aconnector) {
 		if (notify->type == DMUB_NOTIFICATION_HPD) {
 			if (hpd_aconnector->dc_link->hpd_status == (notify->hpd_status == DP_HPD_PLUG))
-				DRM_WARN("DMUB reported hpd status unchanged. link_index=%u\n", link_index);
+				drm_warn(adev_to_drm(adev), "DMUB reported hpd status unchanged. link_index=%u\n", link_index);
 			handle_hpd_irq_helper(hpd_aconnector);
 		} else if (notify->type == DMUB_NOTIFICATION_HPD_IRQ) {
 			handle_hpd_rx_irq(hpd_aconnector);
@@ -871,7 +902,7 @@ static void dm_handle_hpd_work(struct work_struct *work)
 	dmub_hpd_wrk = container_of(work, struct dmub_hpd_work, handle_hpd_work);
 
 	if (!dmub_hpd_wrk->dmub_notify) {
-		DRM_ERROR("dmub_hpd_wrk dmub_notify is NULL");
+		drm_err(adev_to_drm(dmub_hpd_wrk->adev), "dmub_hpd_wrk dmub_notify is NULL");
 		return;
 	}
 
@@ -885,6 +916,30 @@ static void dm_handle_hpd_work(struct work_struct *work)
 
 }
 
+static const char *dmub_notification_type_str(enum dmub_notification_type e)
+{
+	switch (e) {
+	case DMUB_NOTIFICATION_NO_DATA:
+		return "NO_DATA";
+	case DMUB_NOTIFICATION_AUX_REPLY:
+		return "AUX_REPLY";
+	case DMUB_NOTIFICATION_HPD:
+		return "HPD";
+	case DMUB_NOTIFICATION_HPD_IRQ:
+		return "HPD_IRQ";
+	case DMUB_NOTIFICATION_SET_CONFIG_REPLY:
+		return "SET_CONFIG_REPLY";
+	case DMUB_NOTIFICATION_DPIA_NOTIFICATION:
+		return "DPIA_NOTIFICATION";
+	case DMUB_NOTIFICATION_HPD_SENSE_NOTIFY:
+		return "HPD_SENSE_NOTIFY";
+	case DMUB_NOTIFICATION_FUSED_IO:
+		return "FUSED_IO";
+	default:
+		return "<unknown>";
+	}
+}
+
 #define DMUB_TRACE_MAX_READ 64
 /**
  * dm_dmub_outbox1_low_irq() - Handles Outbox interrupt
@@ -902,15 +957,6 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params)
 	struct dmcub_trace_buf_entry entry = { 0 };
 	u32 count = 0;
 	struct dmub_hpd_work *dmub_hpd_wrk;
-	static const char *const event_type[] = {
-		"NO_DATA",
-		"AUX_REPLY",
-		"HPD",
-		"HPD_IRQ",
-		"SET_CONFIGC_REPLY",
-		"DPIA_NOTIFICATION",
-		"HPD_SENSE_NOTIFY",
-	};
 
 	do {
 		if (dc_dmub_srv_get_dmub_outbox0_msg(dm->dc, &entry)) {
@@ -935,25 +981,25 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params)
 		do {
 			dc_stat_get_dmub_notification(adev->dm.dc, &notify);
 			if (notify.type >= ARRAY_SIZE(dm->dmub_thread_offload)) {
-				DRM_ERROR("DM: notify type %d invalid!", notify.type);
+				drm_err(adev_to_drm(adev), "DM: notify type %d invalid!", notify.type);
 				continue;
 			}
 			if (!dm->dmub_callback[notify.type]) {
-				DRM_WARN("DMUB notification skipped due to no handler: type=%s\n",
-					event_type[notify.type]);
+				drm_warn(adev_to_drm(adev), "DMUB notification skipped due to no handler: type=%s\n",
+					dmub_notification_type_str(notify.type));
 				continue;
 			}
 			if (dm->dmub_thread_offload[notify.type] == true) {
 				dmub_hpd_wrk = kzalloc(sizeof(*dmub_hpd_wrk), GFP_ATOMIC);
 				if (!dmub_hpd_wrk) {
-					DRM_ERROR("Failed to allocate dmub_hpd_wrk");
+					drm_err(adev_to_drm(adev), "Failed to allocate dmub_hpd_wrk");
 					return;
 				}
 				dmub_hpd_wrk->dmub_notify = kmemdup(&notify, sizeof(struct dmub_notification),
 								    GFP_ATOMIC);
 				if (!dmub_hpd_wrk->dmub_notify) {
 					kfree(dmub_hpd_wrk);
-					DRM_ERROR("Failed to allocate dmub_hpd_wrk->dmub_notify");
+					drm_err(adev_to_drm(adev), "Failed to allocate dmub_hpd_wrk->dmub_notify");
 					return;
 				}
 				INIT_WORK(&dmub_hpd_wrk->handle_hpd_work, dm_handle_hpd_work);
@@ -1011,10 +1057,10 @@ static void amdgpu_dm_fbc_init(struct drm_connector *connector)
 			    &compressor->gpu_addr, &compressor->cpu_addr);
 
 		if (r)
-			DRM_ERROR("DM: Failed to initialize FBC\n");
+			drm_err(adev_to_drm(adev), "DM: Failed to initialize FBC\n");
 		else {
 			adev->dm.dc->ctx->fbc_gpu_addr = compressor->gpu_addr;
-			DRM_INFO("DM: FBC alloc %lu\n", max_size*4);
+			drm_info(adev_to_drm(adev), "DM: FBC alloc %lu\n", max_size*4);
 		}
 
 	}
@@ -1179,13 +1225,13 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
 		return 0;
 
 	if (!fb_info) {
-		DRM_ERROR("No framebuffer info for DMUB service.\n");
+		drm_err(adev_to_drm(adev), "No framebuffer info for DMUB service.\n");
 		return -EINVAL;
 	}
 
 	if (!dmub_fw) {
 		/* Firmware required for DMUB support. */
-		DRM_ERROR("No firmware provided for DMUB.\n");
+		drm_err(adev_to_drm(adev), "No firmware provided for DMUB.\n");
 		return -EINVAL;
 	}
 
@@ -1195,19 +1241,19 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
 
 	status = dmub_srv_has_hw_support(dmub_srv, &has_hw_support);
 	if (status != DMUB_STATUS_OK) {
-		DRM_ERROR("Error checking HW support for DMUB: %d\n", status);
+		drm_err(adev_to_drm(adev), "Error checking HW support for DMUB: %d\n", status);
 		return -EINVAL;
 	}
 
 	if (!has_hw_support) {
-		DRM_INFO("DMUB unsupported on ASIC\n");
+		drm_info(adev_to_drm(adev), "DMUB unsupported on ASIC\n");
 		return 0;
 	}
 
 	/* Reset DMCUB if it was previously running - before we overwrite its memory. */
 	status = dmub_srv_hw_reset(dmub_srv);
 	if (status != DMUB_STATUS_OK)
-		DRM_WARN("Error resetting DMUB HW: %d\n", status);
+		drm_warn(adev_to_drm(adev), "Error resetting DMUB HW: %d\n", status);
 
 	hdr = (const struct dmcub_firmware_header_v1_0 *)dmub_fw->data;
 
@@ -1290,6 +1336,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
 	case IP_VERSION(3, 5, 1):
 	case IP_VERSION(3, 6, 0):
 		hw_params.ips_sequential_ono = adev->external_rev_id > 0x10;
+		hw_params.lower_hbr3_phy_ssc = true;
 		break;
 	default:
 		break;
@@ -1297,14 +1344,14 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
 
 	status = dmub_srv_hw_init(dmub_srv, &hw_params);
 	if (status != DMUB_STATUS_OK) {
-		DRM_ERROR("Error initializing DMUB HW: %d\n", status);
+		drm_err(adev_to_drm(adev), "Error initializing DMUB HW: %d\n", status);
 		return -EINVAL;
 	}
 
 	/* Wait for firmware load to finish. */
 	status = dmub_srv_wait_for_auto_load(dmub_srv, 100000);
 	if (status != DMUB_STATUS_OK)
-		DRM_WARN("Wait for DMUB auto-load failed: %d\n", status);
+		drm_warn(adev_to_drm(adev), "Wait for DMUB auto-load failed: %d\n", status);
 
 	/* Init DMCU and ABM if available. */
 	if (dmcu && abm) {
@@ -1315,11 +1362,11 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
 	if (!adev->dm.dc->ctx->dmub_srv)
 		adev->dm.dc->ctx->dmub_srv = dc_dmub_srv_create(adev->dm.dc, dmub_srv);
 	if (!adev->dm.dc->ctx->dmub_srv) {
-		DRM_ERROR("Couldn't allocate DC DMUB server!\n");
+		drm_err(adev_to_drm(adev), "Couldn't allocate DC DMUB server!\n");
 		return -ENOMEM;
 	}
 
-	DRM_INFO("DMUB hardware initialized: version=0x%08X\n",
+	drm_info(adev_to_drm(adev), "DMUB hardware initialized: version=0x%08X\n",
 		 adev->dm.dmcub_fw_version);
 
 	/* Keeping sanity checks off if
@@ -1362,18 +1409,18 @@ static void dm_dmub_hw_resume(struct amdgpu_device *adev)
 
 	status = dmub_srv_is_hw_init(dmub_srv, &init);
 	if (status != DMUB_STATUS_OK)
-		DRM_WARN("DMUB hardware init check failed: %d\n", status);
+		drm_warn(adev_to_drm(adev), "DMUB hardware init check failed: %d\n", status);
 
 	if (status == DMUB_STATUS_OK && init) {
 		/* Wait for firmware load to finish. */
 		status = dmub_srv_wait_for_auto_load(dmub_srv, 100000);
 		if (status != DMUB_STATUS_OK)
-			DRM_WARN("Wait for DMUB auto-load failed: %d\n", status);
+			drm_warn(adev_to_drm(adev), "Wait for DMUB auto-load failed: %d\n", status);
 	} else {
 		/* Perform the full hardware initialization. */
 		r = dm_dmub_hw_init(adev);
 		if (r)
-			DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
+			drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r);
 	}
 }
 
@@ -1483,18 +1530,18 @@ static void dm_handle_hpd_rx_offload_work(struct work_struct *work)
 
 	offload_work = container_of(work, struct hpd_rx_irq_offload_work, work);
 	aconnector = offload_work->offload_wq->aconnector;
+	adev = offload_work->adev;
 
 	if (!aconnector) {
-		DRM_ERROR("Can't retrieve aconnector in hpd_rx_irq_offload_work");
+		drm_err(adev_to_drm(adev), "Can't retrieve aconnector in hpd_rx_irq_offload_work");
 		goto skip;
 	}
 
-	adev = drm_to_adev(aconnector->base.dev);
 	dc_link = aconnector->dc_link;
 
 	mutex_lock(&aconnector->hpd_lock);
 	if (!dc_link_detect_connection_type(dc_link, &new_connection_type))
-		DRM_ERROR("KMS: Failed to detect connector\n");
+		drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n");
 	mutex_unlock(&aconnector->hpd_lock);
 
 	if (new_connection_type == dc_connection_none)
@@ -1563,8 +1610,9 @@ skip:
 
 }
 
-static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct dc *dc)
+static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct amdgpu_device *adev)
 {
+	struct dc *dc = adev->dm.dc;
 	int max_caps = dc->caps.max_links;
 	int i = 0;
 	struct hpd_rx_irq_offload_work_queue *hpd_rx_offload_wq = NULL;
@@ -1580,7 +1628,7 @@ static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct
 				    create_singlethread_workqueue("amdgpu_dm_hpd_rx_offload_wq");
 
 		if (hpd_rx_offload_wq[i].wq == NULL) {
-			DRM_ERROR("create amdgpu_dm_hpd_rx_offload_wq fail!");
+			drm_err(adev_to_drm(adev), "create amdgpu_dm_hpd_rx_offload_wq fail!");
 			goto out_err;
 		}
 
@@ -1951,7 +1999,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	mutex_init(&adev->dm.audio_lock);
 
 	if (amdgpu_dm_irq_init(adev)) {
-		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
+		drm_err(adev_to_drm(adev), "amdgpu: failed to initialize DM IRQ support.\n");
 		goto error;
 	}
 
@@ -2073,10 +2121,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	adev->dm.dc = dc_create(&init_data);
 
 	if (adev->dm.dc) {
-		DRM_INFO("Display Core v%s initialized on %s\n", DC_VER,
+		drm_info(adev_to_drm(adev), "Display Core v%s initialized on %s\n", DC_VER,
 			 dce_version_to_string(adev->dm.dc->ctx->dce_version));
 	} else {
-		DRM_INFO("Display Core failed to initialize with v%s!\n", DC_VER);
+		drm_info(adev_to_drm(adev), "Display Core failed to initialize with v%s!\n", DC_VER);
 		goto error;
 	}
 
@@ -2110,25 +2158,31 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 		adev->dm.dc->debug.using_dml21 = true;
 	}
 
+	if (amdgpu_dc_debug_mask & DC_HDCP_LC_FORCE_FW_ENABLE)
+		adev->dm.dc->debug.hdcp_lc_force_fw_enable = true;
+
+	if (amdgpu_dc_debug_mask & DC_HDCP_LC_ENABLE_SW_FALLBACK)
+		adev->dm.dc->debug.hdcp_lc_enable_sw_fallback = true;
+
 	adev->dm.dc->debug.visual_confirm = amdgpu_dc_visual_confirm;
 
 	/* TODO: Remove after DP2 receiver gets proper support of Cable ID feature */
 	adev->dm.dc->debug.ignore_cable_id = true;
 
 	if (adev->dm.dc->caps.dp_hdmi21_pcon_support)
-		DRM_INFO("DP-HDMI FRL PCON supported\n");
+		drm_info(adev_to_drm(adev), "DP-HDMI FRL PCON supported\n");
 
 	r = dm_dmub_hw_init(adev);
 	if (r) {
-		DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
+		drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r);
 		goto error;
 	}
 
 	dc_hardware_init(adev->dm.dc);
 
-	adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev->dm.dc);
+	adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev);
 	if (!adev->dm.hpd_rx_offload_wq) {
-		DRM_ERROR("amdgpu: failed to create hpd rx offload workqueue.\n");
+		drm_err(adev_to_drm(adev), "amdgpu: failed to create hpd rx offload workqueue.\n");
 		goto error;
 	}
 
@@ -2143,7 +2197,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 
 	adev->dm.freesync_module = mod_freesync_create(adev->dm.dc);
 	if (!adev->dm.freesync_module) {
-		DRM_ERROR(
+		drm_err(adev_to_drm(adev),
 		"amdgpu: failed to initialize freesync_module.\n");
 	} else
 		DRM_DEBUG_DRIVER("amdgpu: freesync_module init done %p.\n",
@@ -2155,7 +2209,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 		adev->dm.vblank_control_workqueue =
 			create_singlethread_workqueue("dm_vblank_control_workqueue");
 		if (!adev->dm.vblank_control_workqueue)
-			DRM_ERROR("amdgpu: failed to initialize vblank_workqueue.\n");
+			drm_err(adev_to_drm(adev), "amdgpu: failed to initialize vblank_workqueue.\n");
 	}
 
 	if (adev->dm.dc->caps.ips_support &&
@@ -2166,7 +2220,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 		adev->dm.hdcp_workqueue = hdcp_create_workqueue(adev, &init_params.cp_psp, adev->dm.dc);
 
 		if (!adev->dm.hdcp_workqueue)
-			DRM_ERROR("amdgpu: failed to initialize hdcp_workqueue.\n");
+			drm_err(adev_to_drm(adev), "amdgpu: failed to initialize hdcp_workqueue.\n");
 		else
 			DRM_DEBUG_DRIVER("amdgpu: hdcp_workqueue init done %p.\n", adev->dm.hdcp_workqueue);
 
@@ -2176,20 +2230,29 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 		init_completion(&adev->dm.dmub_aux_transfer_done);
 		adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL);
 		if (!adev->dm.dmub_notify) {
-			DRM_INFO("amdgpu: fail to allocate adev->dm.dmub_notify");
+			drm_info(adev_to_drm(adev), "amdgpu: fail to allocate adev->dm.dmub_notify");
 			goto error;
 		}
 
 		adev->dm.delayed_hpd_wq = create_singlethread_workqueue("amdgpu_dm_hpd_wq");
 		if (!adev->dm.delayed_hpd_wq) {
-			DRM_ERROR("amdgpu: failed to create hpd offload workqueue.\n");
+			drm_err(adev_to_drm(adev), "amdgpu: failed to create hpd offload workqueue.\n");
 			goto error;
 		}
 
 		amdgpu_dm_outbox_init(adev);
 		if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_AUX_REPLY,
 			dmub_aux_setconfig_callback, false)) {
-			DRM_ERROR("amdgpu: fail to register dmub aux callback");
+			drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub aux callback");
+			goto error;
+		}
+
+		for (size_t i = 0; i < ARRAY_SIZE(adev->dm.fused_io); i++)
+			init_completion(&adev->dm.fused_io[i].replied);
+
+		if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_FUSED_IO,
+			dmub_aux_fused_io_callback, false)) {
+			drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub fused io callback");
 			goto error;
 		}
 		/* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
@@ -2206,7 +2269,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	}
 
 	if (amdgpu_dm_initialize_drm_device(adev)) {
-		DRM_ERROR(
+		drm_err(adev_to_drm(adev),
 		"amdgpu: failed to initialize sw for display support.\n");
 		goto error;
 	}
@@ -2221,7 +2284,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	adev_to_drm(adev)->mode_config.cursor_height = adev->dm.dc->caps.max_cursor_size;
 
 	if (drm_vblank_init(adev_to_drm(adev), adev->dm.display_indexes_num)) {
-		DRM_ERROR(
+		drm_err(adev_to_drm(adev),
 		"amdgpu: failed to initialize sw for display support.\n");
 		goto error;
 	}
@@ -2229,7 +2292,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
 	amdgpu_dm_crtc_secure_display_create_contexts(adev);
 	if (!adev->dm.secure_display_ctx.crtc_ctx)
-		DRM_ERROR("amdgpu: failed to initialize secure display contexts.\n");
+		drm_err(adev_to_drm(adev), "amdgpu: failed to initialize secure display contexts.\n");
 
 	if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(4, 0, 1))
 		adev->dm.secure_display_ctx.support_mul_roi = true;
@@ -2409,7 +2472,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
 		default:
 			break;
 		}
-		DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type);
+		drm_err(adev_to_drm(adev), "Unsupported ASIC type: 0x%X\n", adev->asic_type);
 		return -EINVAL;
 	}
 
@@ -2427,7 +2490,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
 		return 0;
 	}
 	if (r) {
-		dev_err(adev->dev, "amdgpu_dm: Can't validate firmware \"%s\"\n",
+		drm_err(adev_to_drm(adev), "amdgpu_dm: Can't validate firmware \"%s\"\n",
 			fw_name_dmcu);
 		amdgpu_ucode_release(&adev->dm.fw_dmcu);
 		return r;
@@ -2552,7 +2615,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
 		adev->firmware.fw_size +=
 			ALIGN(le32_to_cpu(hdr->inst_const_bytes), PAGE_SIZE);
 
-		DRM_INFO("Loading DMUB firmware via PSP: version=0x%08X\n",
+		drm_info(adev_to_drm(adev), "Loading DMUB firmware via PSP: version=0x%08X\n",
 			 adev->dm.dmcub_fw_version);
 	}
 
@@ -2561,7 +2624,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
 	dmub_srv = adev->dm.dmub_srv;
 
 	if (!dmub_srv) {
-		DRM_ERROR("Failed to allocate DMUB service!\n");
+		drm_err(adev_to_drm(adev), "Failed to allocate DMUB service!\n");
 		return -ENOMEM;
 	}
 
@@ -2574,7 +2637,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
 	/* Create the DMUB service. */
 	status = dmub_srv_create(dmub_srv, &create_params);
 	if (status != DMUB_STATUS_OK) {
-		DRM_ERROR("Error creating DMUB service: %d\n", status);
+		drm_err(adev_to_drm(adev), "Error creating DMUB service: %d\n", status);
 		return -EINVAL;
 	}
 
@@ -2599,7 +2662,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
 					   &region_info);
 
 	if (status != DMUB_STATUS_OK) {
-		DRM_ERROR("Error calculating DMUB region info: %d\n", status);
+		drm_err(adev_to_drm(adev), "Error calculating DMUB region info: %d\n", status);
 		return -EINVAL;
 	}
 
@@ -2628,14 +2691,14 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
 	fb_info = adev->dm.dmub_fb_info;
 
 	if (!fb_info) {
-		DRM_ERROR(
+		drm_err(adev_to_drm(adev),
 			"Failed to allocate framebuffer info for DMUB service!\n");
 		return -ENOMEM;
 	}
 
 	status = dmub_srv_calc_mem_info(dmub_srv, &memory_params, fb_info);
 	if (status != DMUB_STATUS_OK) {
-		DRM_ERROR("Error calculating DMUB FB info: %d\n", status);
+		drm_err(adev_to_drm(adev), "Error calculating DMUB FB info: %d\n", status);
 		return -EINVAL;
 	}
 
@@ -2652,7 +2715,7 @@ static int dm_sw_init(struct amdgpu_ip_block *ip_block)
 	adev->dm.cgs_device = amdgpu_cgs_create_device(adev);
 
 	if (!adev->dm.cgs_device) {
-		DRM_ERROR("amdgpu: failed to create cgs device.\n");
+		drm_err(adev_to_drm(adev), "amdgpu: failed to create cgs device.\n");
 		return -EINVAL;
 	}
 
@@ -2958,7 +3021,7 @@ static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
 
 	ret = amdgpu_dpm_write_watermarks_table(adev);
 	if (ret) {
-		DRM_ERROR("Failed to update WMTABLE!\n");
+		drm_err(adev_to_drm(adev), "Failed to update WMTABLE!\n");
 		return ret;
 	}
 
@@ -2976,13 +3039,13 @@ static int dm_oem_i2c_hw_init(struct amdgpu_device *adev)
 	if (oem_ddc_service) {
 		oem_i2c = create_i2c(oem_ddc_service, true);
 		if (!oem_i2c) {
-			dev_info(adev->dev, "Failed to create oem i2c adapter data\n");
+			drm_info(adev_to_drm(adev), "Failed to create oem i2c adapter data\n");
 			return -ENOMEM;
 		}
 
 		r = i2c_add_adapter(&oem_i2c->base);
 		if (r) {
-			dev_info(adev->dev, "Failed to register oem i2c\n");
+			drm_info(adev_to_drm(adev), "Failed to register oem i2c\n");
 			kfree(oem_i2c);
 			return r;
 		}
@@ -3025,7 +3088,7 @@ static int dm_hw_init(struct amdgpu_ip_block *ip_block)
 
 	r = dm_oem_i2c_hw_init(adev);
 	if (r)
-		dev_info(adev->dev, "Failed to add OEM i2c bus\n");
+		drm_info(adev_to_drm(adev), "Failed to add OEM i2c bus\n");
 
 	return 0;
 }
@@ -3068,7 +3131,7 @@ static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev,
 			irq_source = IRQ_TYPE_PFLIP + acrtc->otg_inst;
 			rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
 			if (rc)
-				DRM_WARN("Failed to %s pflip interrupts\n",
+				drm_warn(adev_to_drm(adev), "Failed to %s pflip interrupts\n",
 					 enable ? "enable" : "disable");
 
 			if (enable) {
@@ -3078,14 +3141,14 @@ static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev,
 				rc = amdgpu_dm_crtc_set_vupdate_irq(&acrtc->base, false);
 
 			if (rc)
-				DRM_WARN("Failed to %sable vupdate interrupt\n", enable ? "en" : "dis");
+				drm_warn(adev_to_drm(adev), "Failed to %sable vupdate interrupt\n", enable ? "en" : "dis");
 
 			irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst;
 			/* During gpu-reset we disable and then enable vblank irq, so
 			 * don't use amdgpu_irq_get/put() to avoid refcount change.
 			 */
 			if (!dc_interrupt_set(adev->dm.dc, irq_source, enable))
-				DRM_WARN("Failed to %sable vblank interrupt\n", enable ? "en" : "dis");
+				drm_warn(adev_to_drm(adev), "Failed to %sable vblank interrupt\n", enable ? "en" : "dis");
 		}
 	}
 
@@ -3920,20 +3983,21 @@ static void handle_hpd_irq(void *param)
 
 }
 
-static void schedule_hpd_rx_offload_work(struct hpd_rx_irq_offload_work_queue *offload_wq,
+static void schedule_hpd_rx_offload_work(struct amdgpu_device *adev, struct hpd_rx_irq_offload_work_queue *offload_wq,
 							union hpd_irq_data hpd_irq_data)
 {
 	struct hpd_rx_irq_offload_work *offload_work =
 				kzalloc(sizeof(*offload_work), GFP_KERNEL);
 
 	if (!offload_work) {
-		DRM_ERROR("Failed to allocate hpd_rx_irq_offload_work.\n");
+		drm_err(adev_to_drm(adev), "Failed to allocate hpd_rx_irq_offload_work.\n");
 		return;
 	}
 
 	INIT_WORK(&offload_work->work, dm_handle_hpd_rx_offload_work);
 	offload_work->data = hpd_irq_data;
 	offload_work->offload_wq = offload_wq;
+	offload_work->adev = adev;
 
 	queue_work(offload_wq->wq, &offload_work->work);
 	DRM_DEBUG_KMS("queue work to handle hpd_rx offload work");
@@ -3975,7 +4039,7 @@ static void handle_hpd_rx_irq(void *param)
 		goto out;
 
 	if (hpd_irq_data.bytes.device_service_irq.bits.AUTOMATED_TEST) {
-		schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data);
+		schedule_hpd_rx_offload_work(adev, offload_wq, hpd_irq_data);
 		goto out;
 	}
 
@@ -3997,7 +4061,7 @@ static void handle_hpd_rx_irq(void *param)
 			spin_unlock(&offload_wq->offload_lock);
 
 			if (!skip)
-				schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data);
+				schedule_hpd_rx_offload_work(adev, offload_wq, hpd_irq_data);
 
 			goto out;
 		}
@@ -4014,7 +4078,7 @@ static void handle_hpd_rx_irq(void *param)
 			spin_unlock(&offload_wq->offload_lock);
 
 			if (!skip)
-				schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data);
+				schedule_hpd_rx_offload_work(adev, offload_wq, hpd_irq_data);
 
 			goto out;
 		}
@@ -4024,7 +4088,7 @@ out:
 	if (result && !is_mst_root_connector) {
 		/* Downstream Port status changed. */
 		if (!dc_link_detect_connection_type(dc_link, &new_connection_type))
-			DRM_ERROR("KMS: Failed to detect connector\n");
+			drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n");
 
 		if (aconnector->base.force && new_connection_type == dc_connection_none) {
 			emulated_link_detect(dc_link);
@@ -4087,19 +4151,19 @@ static int register_hpd_handlers(struct amdgpu_device *adev)
 	if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
 		if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD,
 			dmub_hpd_callback, true)) {
-			DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+			drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub hpd callback");
 			return -EINVAL;
 		}
 
 		if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ,
 			dmub_hpd_callback, true)) {
-			DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+			drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub hpd callback");
 			return -EINVAL;
 		}
 
 		if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_SENSE_NOTIFY,
 			dmub_hpd_sense_callback, true)) {
-			DRM_ERROR("amdgpu: fail to register dmub hpd sense callback");
+			drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub hpd sense callback");
 			return -EINVAL;
 		}
 	}
@@ -4120,7 +4184,7 @@ static int register_hpd_handlers(struct amdgpu_device *adev)
 			if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
 				int_params.irq_source  < DC_IRQ_SOURCE_HPD1 ||
 				int_params.irq_source  > DC_IRQ_SOURCE_HPD6) {
-				DRM_ERROR("Failed to register hpd irq!\n");
+				drm_err(adev_to_drm(adev), "Failed to register hpd irq!\n");
 				return -EINVAL;
 			}
 
@@ -4138,7 +4202,7 @@ static int register_hpd_handlers(struct amdgpu_device *adev)
 			if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
 				int_params.irq_source  < DC_IRQ_SOURCE_HPD1RX ||
 				int_params.irq_source  > DC_IRQ_SOURCE_HPD6RX) {
-				DRM_ERROR("Failed to register hpd rx irq!\n");
+				drm_err(adev_to_drm(adev), "Failed to register hpd rx irq!\n");
 				return -EINVAL;
 			}
 
@@ -4180,7 +4244,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 		r = amdgpu_irq_add_id(adev, client_id, i + 1, &adev->crtc_irq);
 		if (r) {
-			DRM_ERROR("Failed to add crtc irq id!\n");
+			drm_err(adev_to_drm(adev), "Failed to add crtc irq id!\n");
 			return r;
 		}
 
@@ -4191,7 +4255,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
 		if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
 			int_params.irq_source  < DC_IRQ_SOURCE_VBLANK1 ||
 			int_params.irq_source  > DC_IRQ_SOURCE_VBLANK6) {
-			DRM_ERROR("Failed to register vblank irq!\n");
+			drm_err(adev_to_drm(adev), "Failed to register vblank irq!\n");
 			return -EINVAL;
 		}
 
@@ -4210,7 +4274,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
 			i <= VISLANDS30_IV_SRCID_D6_GRPH_PFLIP; i += 2) {
 		r = amdgpu_irq_add_id(adev, client_id, i, &adev->pageflip_irq);
 		if (r) {
-			DRM_ERROR("Failed to add page flip irq id!\n");
+			drm_err(adev_to_drm(adev), "Failed to add page flip irq id!\n");
 			return r;
 		}
 
@@ -4221,7 +4285,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
 		if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
 			int_params.irq_source  < DC_IRQ_SOURCE_PFLIP_FIRST ||
 			int_params.irq_source  > DC_IRQ_SOURCE_PFLIP_LAST) {
-			DRM_ERROR("Failed to register pflip irq!\n");
+			drm_err(adev_to_drm(adev), "Failed to register pflip irq!\n");
 			return -EINVAL;
 		}
 
@@ -4239,7 +4303,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
 	r = amdgpu_irq_add_id(adev, client_id,
 			VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
 	if (r) {
-		DRM_ERROR("Failed to add hpd irq id!\n");
+		drm_err(adev_to_drm(adev), "Failed to add hpd irq id!\n");
 		return r;
 	}
 
@@ -4281,7 +4345,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
 	for (i = VISLANDS30_IV_SRCID_D1_VERTICAL_INTERRUPT0; i <= VISLANDS30_IV_SRCID_D6_VERTICAL_INTERRUPT0; i++) {
 		r = amdgpu_irq_add_id(adev, client_id, i, &adev->crtc_irq);
 		if (r) {
-			DRM_ERROR("Failed to add crtc irq id!\n");
+			drm_err(adev_to_drm(adev), "Failed to add crtc irq id!\n");
 			return r;
 		}
 
@@ -4292,7 +4356,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
 		if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
 			int_params.irq_source  < DC_IRQ_SOURCE_VBLANK1 ||
 			int_params.irq_source  > DC_IRQ_SOURCE_VBLANK6) {
-			DRM_ERROR("Failed to register vblank irq!\n");
+			drm_err(adev_to_drm(adev), "Failed to register vblank irq!\n");
 			return -EINVAL;
 		}
 
@@ -4310,7 +4374,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
 	for (i = VISLANDS30_IV_SRCID_D1_V_UPDATE_INT; i <= VISLANDS30_IV_SRCID_D6_V_UPDATE_INT; i += 2) {
 		r = amdgpu_irq_add_id(adev, client_id, i, &adev->vupdate_irq);
 		if (r) {
-			DRM_ERROR("Failed to add vupdate irq id!\n");
+			drm_err(adev_to_drm(adev), "Failed to add vupdate irq id!\n");
 			return r;
 		}
 
@@ -4321,7 +4385,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
 		if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
 			int_params.irq_source  < DC_IRQ_SOURCE_VUPDATE1 ||
 			int_params.irq_source  > DC_IRQ_SOURCE_VUPDATE6) {
-			DRM_ERROR("Failed to register vupdate irq!\n");
+			drm_err(adev_to_drm(adev), "Failed to register vupdate irq!\n");
 			return -EINVAL;
 		}
 
@@ -4340,7 +4404,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
 			i <= VISLANDS30_IV_SRCID_D6_GRPH_PFLIP; i += 2) {
 		r = amdgpu_irq_add_id(adev, client_id, i, &adev->pageflip_irq);
 		if (r) {
-			DRM_ERROR("Failed to add page flip irq id!\n");
+			drm_err(adev_to_drm(adev), "Failed to add page flip irq id!\n");
 			return r;
 		}
 
@@ -4351,7 +4415,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
 		if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
 			int_params.irq_source  < DC_IRQ_SOURCE_PFLIP_FIRST ||
 			int_params.irq_source  > DC_IRQ_SOURCE_PFLIP_LAST) {
-			DRM_ERROR("Failed to register pflip irq!\n");
+			drm_err(adev_to_drm(adev), "Failed to register pflip irq!\n");
 			return -EINVAL;
 		}
 
@@ -4369,7 +4433,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
 	r = amdgpu_irq_add_id(adev, client_id,
 			VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
 	if (r) {
-		DRM_ERROR("Failed to add hpd irq id!\n");
+		drm_err(adev_to_drm(adev), "Failed to add hpd irq id!\n");
 		return r;
 	}
 
@@ -4419,7 +4483,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
 		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->crtc_irq);
 
 		if (r) {
-			DRM_ERROR("Failed to add crtc irq id!\n");
+			drm_err(adev_to_drm(adev), "Failed to add crtc irq id!\n");
 			return r;
 		}
 
@@ -4430,7 +4494,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
 		if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
 			int_params.irq_source  < DC_IRQ_SOURCE_VBLANK1 ||
 			int_params.irq_source  > DC_IRQ_SOURCE_VBLANK6) {
-			DRM_ERROR("Failed to register vblank irq!\n");
+			drm_err(adev_to_drm(adev), "Failed to register vblank irq!\n");
 			return -EINVAL;
 		}
 
@@ -4451,7 +4515,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
 				vrtl_int_srcid[i], &adev->vline0_irq);
 
 		if (r) {
-			DRM_ERROR("Failed to add vline0 irq id!\n");
+			drm_err(adev_to_drm(adev), "Failed to add vline0 irq id!\n");
 			return r;
 		}
 
@@ -4462,7 +4526,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
 		if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
 			int_params.irq_source < DC_IRQ_SOURCE_DC1_VLINE0 ||
 			int_params.irq_source > DC_IRQ_SOURCE_DC6_VLINE0) {
-			DRM_ERROR("Failed to register vline0 irq!\n");
+			drm_err(adev_to_drm(adev), "Failed to register vline0 irq!\n");
 			return -EINVAL;
 		}
 
@@ -4490,7 +4554,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
 		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->vupdate_irq);
 
 		if (r) {
-			DRM_ERROR("Failed to add vupdate irq id!\n");
+			drm_err(adev_to_drm(adev), "Failed to add vupdate irq id!\n");
 			return r;
 		}
 
@@ -4501,7 +4565,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
 		if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
 			int_params.irq_source  < DC_IRQ_SOURCE_VUPDATE1 ||
 			int_params.irq_source  > DC_IRQ_SOURCE_VUPDATE6) {
-			DRM_ERROR("Failed to register vupdate irq!\n");
+			drm_err(adev_to_drm(adev), "Failed to register vupdate irq!\n");
 			return -EINVAL;
 		}
 
@@ -4521,7 +4585,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
 			i++) {
 		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->pageflip_irq);
 		if (r) {
-			DRM_ERROR("Failed to add page flip irq id!\n");
+			drm_err(adev_to_drm(adev), "Failed to add page flip irq id!\n");
 			return r;
 		}
 
@@ -4532,7 +4596,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
 		if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
 			int_params.irq_source  < DC_IRQ_SOURCE_PFLIP_FIRST ||
 			int_params.irq_source  > DC_IRQ_SOURCE_PFLIP_LAST) {
-			DRM_ERROR("Failed to register pflip irq!\n");
+			drm_err(adev_to_drm(adev), "Failed to register pflip irq!\n");
 			return -EINVAL;
 		}
 
@@ -4550,7 +4614,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DC_HPD1_INT,
 			&adev->hpd_irq);
 	if (r) {
-		DRM_ERROR("Failed to add hpd irq id!\n");
+		drm_err(adev_to_drm(adev), "Failed to add hpd irq id!\n");
 		return r;
 	}
 
@@ -4572,7 +4636,7 @@ static int register_outbox_irq_handlers(struct amdgpu_device *adev)
 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT,
 			&adev->dmub_outbox_irq);
 	if (r) {
-		DRM_ERROR("Failed to add outbox irq id!\n");
+		drm_err(adev_to_drm(adev), "Failed to add outbox irq id!\n");
 		return r;
 	}
 
@@ -4804,41 +4868,54 @@ static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps,
 	return 1;
 }
 
-static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *caps,
-					uint32_t brightness)
+static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *caps,
+				      uint32_t *brightness)
 {
-	unsigned int min, max;
 	u8 prev_signal = 0, prev_lum = 0;
+	int i = 0;
 
-	if (!get_brightness_range(caps, &min, &max))
-		return brightness;
-
-	for (int i = 0; i < caps->data_points; i++) {
-		u8 signal, lum;
+	if (amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)
+		return;
 
-		if (amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)
-			break;
+	if (!caps->data_points)
+		return;
 
-		signal = caps->luminance_data[i].input_signal;
-		lum = caps->luminance_data[i].luminance;
+	/* choose start to run less interpolation steps */
+	if (caps->luminance_data[caps->data_points/2].input_signal > *brightness)
+		i = caps->data_points/2;
+	do {
+		u8 signal = caps->luminance_data[i].input_signal;
+		u8 lum = caps->luminance_data[i].luminance;
 
 		/*
 		 * brightness == signal: luminance is percent numerator
 		 * brightness < signal: interpolate between previous and current luminance numerator
 		 * brightness > signal: find next data point
 		 */
-		if (brightness < signal)
-			lum = prev_lum + DIV_ROUND_CLOSEST((lum - prev_lum) *
-							   (brightness - prev_signal),
-							   signal - prev_signal);
-		else if (brightness > signal) {
+		if (*brightness > signal) {
 			prev_signal = signal;
 			prev_lum = lum;
+			i++;
 			continue;
 		}
-		brightness = DIV_ROUND_CLOSEST(lum * brightness, 101);
-		break;
-	}
+		if (*brightness < signal)
+			lum = prev_lum + DIV_ROUND_CLOSEST((lum - prev_lum) *
+							   (*brightness - prev_signal),
+							   signal - prev_signal);
+		*brightness = DIV_ROUND_CLOSEST(lum * *brightness, 101);
+		return;
+	} while (i < caps->data_points);
+}
+
+static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *caps,
+					uint32_t brightness)
+{
+	unsigned int min, max;
+
+	if (!get_brightness_range(caps, &min, &max))
+		return brightness;
+
+	convert_custom_brightness(caps, &brightness);
 
 	// Rescale 0..255 to min..max
 	return min + DIV_ROUND_CLOSEST((max - min) * brightness,
@@ -5017,7 +5094,7 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector)
 	dm->brightness[aconnector->bl_idx] = props.brightness;
 
 	if (IS_ERR(dm->backlight_dev[aconnector->bl_idx])) {
-		DRM_ERROR("DM: Backlight registration failed!\n");
+		drm_err(drm, "DM: Backlight registration failed!\n");
 		dm->backlight_dev[aconnector->bl_idx] = NULL;
 	} else
 		DRM_DEBUG_DRIVER("DM: Registered Backlight device: %s\n", bl_name);
@@ -5034,7 +5111,7 @@ static int initialize_plane(struct amdgpu_display_manager *dm,
 
 	plane = kzalloc(sizeof(struct drm_plane), GFP_KERNEL);
 	if (!plane) {
-		DRM_ERROR("KMS: Failed to allocate plane\n");
+		drm_err(adev_to_drm(dm->adev), "KMS: Failed to allocate plane\n");
 		return -ENOMEM;
 	}
 	plane->type = plane_type;
@@ -5052,7 +5129,7 @@ static int initialize_plane(struct amdgpu_display_manager *dm,
 	ret = amdgpu_dm_plane_init(dm, plane, possible_crtcs, plane_cap);
 
 	if (ret) {
-		DRM_ERROR("KMS: Failed to initialize plane\n");
+		drm_err(adev_to_drm(dm->adev), "KMS: Failed to initialize plane\n");
 		kfree(plane);
 		return ret;
 	}
@@ -5121,14 +5198,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 
 	link_cnt = dm->dc->caps.max_links;
 	if (amdgpu_dm_mode_config_init(dm->adev)) {
-		DRM_ERROR("DM: Failed to initialize mode config\n");
+		drm_err(adev_to_drm(adev), "DM: Failed to initialize mode config\n");
 		return -EINVAL;
 	}
 
 	/* There is one primary plane per CRTC */
 	primary_planes = dm->dc->caps.max_streams;
 	if (primary_planes > AMDGPU_MAX_PLANES) {
-		DRM_ERROR("DM: Plane nums out of 6 planes\n");
+		drm_err(adev_to_drm(adev), "DM: Plane nums out of 6 planes\n");
 		return -EINVAL;
 	}
 
@@ -5141,7 +5218,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 
 		if (initialize_plane(dm, mode_info, i,
 				     DRM_PLANE_TYPE_PRIMARY, plane)) {
-			DRM_ERROR("KMS: Failed to initialize primary plane\n");
+			drm_err(adev_to_drm(adev), "KMS: Failed to initialize primary plane\n");
 			goto fail;
 		}
 	}
@@ -5173,14 +5250,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 
 		if (initialize_plane(dm, NULL, primary_planes + i,
 				     DRM_PLANE_TYPE_OVERLAY, plane)) {
-			DRM_ERROR("KMS: Failed to initialize overlay plane\n");
+			drm_err(adev_to_drm(adev), "KMS: Failed to initialize overlay plane\n");
 			goto fail;
 		}
 	}
 
 	for (i = 0; i < dm->dc->caps.max_streams; i++)
 		if (amdgpu_dm_crtc_init(dm, mode_info->planes[i], i)) {
-			DRM_ERROR("KMS: Failed to initialize crtc\n");
+			drm_err(adev_to_drm(adev), "KMS: Failed to initialize crtc\n");
 			goto fail;
 		}
 
@@ -5200,7 +5277,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 	case IP_VERSION(3, 6, 0):
 	case IP_VERSION(4, 0, 1):
 		if (register_outbox_irq_handlers(dm->adev)) {
-			DRM_ERROR("DM: Failed to initialize IRQ\n");
+			drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n");
 			goto fail;
 		}
 		break;
@@ -5250,7 +5327,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 	}
 
 	if (link_cnt > MAX_LINKS) {
-		DRM_ERROR(
+		drm_err(adev_to_drm(adev),
 			"KMS: Cannot support more than %d display indexes\n",
 				MAX_LINKS);
 		goto fail;
@@ -5266,12 +5343,12 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 			struct amdgpu_dm_wb_connector *wbcon = kzalloc(sizeof(*wbcon), GFP_KERNEL);
 
 			if (!wbcon) {
-				DRM_ERROR("KMS: Failed to allocate writeback connector\n");
+				drm_err(adev_to_drm(adev), "KMS: Failed to allocate writeback connector\n");
 				continue;
 			}
 
 			if (amdgpu_dm_wb_connector_init(dm, wbcon, i)) {
-				DRM_ERROR("KMS: Failed to initialize writeback connector\n");
+				drm_err(adev_to_drm(adev), "KMS: Failed to initialize writeback connector\n");
 				kfree(wbcon);
 				continue;
 			}
@@ -5291,12 +5368,12 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 			goto fail;
 
 		if (amdgpu_dm_encoder_init(dm->ddev, aencoder, i)) {
-			DRM_ERROR("KMS: Failed to initialize encoder\n");
+			drm_err(adev_to_drm(adev), "KMS: Failed to initialize encoder\n");
 			goto fail;
 		}
 
 		if (amdgpu_dm_connector_init(dm, aconnector, i, aencoder)) {
-			DRM_ERROR("KMS: Failed to initialize connector\n");
+			drm_err(adev_to_drm(adev), "KMS: Failed to initialize connector\n");
 			goto fail;
 		}
 
@@ -5305,7 +5382,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 				aconnector;
 
 		if (!dc_link_detect_connection_type(link, &new_connection_type))
-			DRM_ERROR("KMS: Failed to detect connector\n");
+			drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n");
 
 		if (aconnector->base.force && new_connection_type == dc_connection_none) {
 			emulated_link_detect(link);
@@ -5327,8 +5404,15 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 					if (amdgpu_dm_set_replay_caps(link, aconnector))
 						psr_feature_enabled = false;
 
-				if (psr_feature_enabled)
+				if (psr_feature_enabled) {
 					amdgpu_dm_set_psr_caps(link);
+					drm_info(adev_to_drm(adev), "PSR support %d, DC PSR ver %d, sink PSR ver %d DPCD caps 0x%x su_y_granularity %d\n",
+						 link->psr_settings.psr_feature_enabled,
+						 link->psr_settings.psr_version,
+						 link->dpcd_caps.psr_info.psr_version,
+						 link->dpcd_caps.psr_info.psr_dpcd_caps.raw,
+						 link->dpcd_caps.psr_info.psr2_su_y_granularity_cap);
+				}
 			}
 		}
 		amdgpu_set_panel_orientation(&aconnector->base);
@@ -5342,7 +5426,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 	case CHIP_VERDE:
 	case CHIP_OLAND:
 		if (dce60_register_irq_handlers(dm->adev)) {
-			DRM_ERROR("DM: Failed to initialize IRQ\n");
+			drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n");
 			goto fail;
 		}
 		break;
@@ -5364,7 +5448,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 	case CHIP_VEGA12:
 	case CHIP_VEGA20:
 		if (dce110_register_irq_handlers(dm->adev)) {
-			DRM_ERROR("DM: Failed to initialize IRQ\n");
+			drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n");
 			goto fail;
 		}
 		break;
@@ -5392,12 +5476,12 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 		case IP_VERSION(3, 6, 0):
 		case IP_VERSION(4, 0, 1):
 			if (dcn10_register_irq_handlers(dm->adev)) {
-				DRM_ERROR("DM: Failed to initialize IRQ\n");
+				drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n");
 				goto fail;
 			}
 			break;
 		default:
-			DRM_ERROR("Unsupported DCE IP versions: 0x%X\n",
+			drm_err(adev_to_drm(adev), "Unsupported DCE IP versions: 0x%X\n",
 					amdgpu_ip_version(adev, DCE_HWIP, 0));
 			goto fail;
 		}
@@ -5558,7 +5642,7 @@ static int dm_early_init(struct amdgpu_ip_block *ip_block)
 	/* if there is no object header, skip DM */
 	if (!amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
 		adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
-		dev_info(adev->dev, "No object header, skipping DM\n");
+		drm_info(adev_to_drm(adev), "No object header, skipping DM\n");
 		return -ENOENT;
 	}
 
@@ -5670,7 +5754,7 @@ static int dm_early_init(struct amdgpu_ip_block *ip_block)
 			adev->mode_info.num_dig = 4;
 			break;
 		default:
-			DRM_ERROR("Unsupported DCE IP versions: 0x%x\n",
+			drm_err(adev_to_drm(adev), "Unsupported DCE IP versions: 0x%x\n",
 					amdgpu_ip_version(adev, DCE_HWIP, 0));
 			return -EINVAL;
 		}
@@ -5819,7 +5903,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
 		plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616;
 		break;
 	default:
-		DRM_ERROR(
+		drm_err(adev_to_drm(adev),
 			"Unsupported screen format %p4cc\n",
 			&fb->format->format);
 		return -EINVAL;
@@ -6340,6 +6424,7 @@ static void fill_stream_properties_from_drm_display_mode(
 	struct amdgpu_dm_connector *aconnector = NULL;
 	struct hdmi_vendor_infoframe hv_frame;
 	struct hdmi_avi_infoframe avi_frame;
+	ssize_t err;
 
 	if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
 		aconnector = to_amdgpu_dm_connector(connector);
@@ -6386,9 +6471,17 @@ static void fill_stream_properties_from_drm_display_mode(
 	}
 
 	if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) {
-		drm_hdmi_avi_infoframe_from_display_mode(&avi_frame, (struct drm_connector *)connector, mode_in);
+		err = drm_hdmi_avi_infoframe_from_display_mode(&avi_frame,
+							       (struct drm_connector *)connector,
+							       mode_in);
+		if (err < 0)
+			drm_warn_once(connector->dev, "Failed to setup avi infoframe on connector %s: %zd \n", connector->name, err);
 		timing_out->vic = avi_frame.video_code;
-		drm_hdmi_vendor_infoframe_from_display_mode(&hv_frame, (struct drm_connector *)connector, mode_in);
+		err = drm_hdmi_vendor_infoframe_from_display_mode(&hv_frame,
+								  (struct drm_connector *)connector,
+								  mode_in);
+		if (err < 0)
+			drm_warn_once(connector->dev, "Failed to setup vendor infoframe on connector %s: %zd \n", connector->name, err);
 		timing_out->hdmi_vic = hv_frame.vic;
 	}
 
@@ -6513,7 +6606,7 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode,
 }
 
 static struct dc_sink *
-create_fake_sink(struct dc_link *link)
+create_fake_sink(struct drm_device *dev, struct dc_link *link)
 {
 	struct dc_sink_init_data sink_init_data = { 0 };
 	struct dc_sink *sink = NULL;
@@ -6523,7 +6616,7 @@ create_fake_sink(struct dc_link *link)
 
 	sink = dc_sink_create(&sink_init_data);
 	if (!sink) {
-		DRM_ERROR("Failed to create sink!\n");
+		drm_err(dev, "Failed to create sink!\n");
 		return NULL;
 	}
 	sink->sink_signal = SIGNAL_TYPE_VIRTUAL;
@@ -6879,6 +6972,7 @@ create_stream_for_sink(struct drm_connector *connector,
 		       const struct dc_stream_state *old_stream,
 		       int requested_bpc)
 {
+	struct drm_device *dev = connector->dev;
 	struct amdgpu_dm_connector *aconnector = NULL;
 	struct drm_display_mode *preferred_mode = NULL;
 	const struct drm_connector_state *con_state = &dm_state->base;
@@ -6901,11 +6995,6 @@ create_stream_for_sink(struct drm_connector *connector,
 	drm_mode_init(&mode, drm_mode);
 	memset(&saved_mode, 0, sizeof(saved_mode));
 
-	if (connector == NULL) {
-		DRM_ERROR("connector is NULL!\n");
-		return stream;
-	}
-
 	if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) {
 		aconnector = NULL;
 		aconnector = to_amdgpu_dm_connector(connector);
@@ -6920,7 +7009,7 @@ create_stream_for_sink(struct drm_connector *connector,
 	}
 
 	if (!aconnector || !aconnector->dc_sink) {
-		sink = create_fake_sink(link);
+		sink = create_fake_sink(dev, link);
 		if (!sink)
 			return stream;
 
@@ -6932,7 +7021,7 @@ create_stream_for_sink(struct drm_connector *connector,
 	stream = dc_create_stream_for_sink(sink);
 
 	if (stream == NULL) {
-		DRM_ERROR("Failed to create stream for sink!\n");
+		drm_err(dev, "Failed to create stream for sink!\n");
 		goto finish;
 	}
 
@@ -7414,6 +7503,7 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector)
 	struct dc_sink *dc_em_sink = aconnector->dc_em_sink;
 	const struct drm_edid *drm_edid;
 	struct i2c_adapter *ddc;
+	struct drm_device *dev = connector->dev;
 
 	if (dc_link && dc_link->aux_mode)
 		ddc = &aconnector->dm_dp_aux.aux.ddc;
@@ -7423,7 +7513,7 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector)
 	drm_edid = drm_edid_read_ddc(connector, ddc);
 	drm_edid_connector_update(connector, drm_edid);
 	if (!drm_edid) {
-		DRM_ERROR("No EDID found on connector: %s.\n", connector->name);
+		drm_err(dev, "No EDID found on connector: %s.\n", connector->name);
 		return;
 	}
 
@@ -7482,7 +7572,7 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector)
 	drm_edid = drm_edid_read_ddc(connector, ddc);
 	drm_edid_connector_update(connector, drm_edid);
 	if (!drm_edid) {
-		DRM_ERROR("No EDID found on connector: %s.\n", connector->name);
+		drm_err(connector->dev, "No EDID found on connector: %s.\n", connector->name);
 		return;
 	}
 
@@ -7616,7 +7706,7 @@ create_validate_stream_for_sink(struct drm_connector *connector,
 						dm_state, old_stream,
 						requested_bpc);
 		if (stream == NULL) {
-			DRM_ERROR("Failed to create stream for sink!\n");
+			drm_err(adev_to_drm(adev), "Failed to create stream for sink!\n");
 			break;
 		}
 
@@ -7691,7 +7781,7 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec
 
 	if (dc_sink == NULL && aconnector->base.force != DRM_FORCE_ON_DIGITAL &&
 				aconnector->base.force != DRM_FORCE_ON) {
-		DRM_ERROR("dc_sink is NULL!\n");
+		drm_err(connector->dev, "dc_sink is NULL!\n");
 		goto fail;
 	}
 
@@ -8599,7 +8689,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
 
 	i2c = create_i2c(link->ddc, false);
 	if (!i2c) {
-		DRM_ERROR("Failed to create i2c adapter data\n");
+		drm_err(adev_to_drm(dm->adev), "Failed to create i2c adapter data\n");
 		return -ENOMEM;
 	}
 
@@ -8607,7 +8697,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
 	res = i2c_add_adapter(&i2c->base);
 
 	if (res) {
-		DRM_ERROR("Failed to register hw i2c %d\n", link->link_index);
+		drm_err(adev_to_drm(dm->adev), "Failed to register hw i2c %d\n", link->link_index);
 		goto out_free;
 	}
 
@@ -8621,7 +8711,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
 			&i2c->base);
 
 	if (res) {
-		DRM_ERROR("connector_init failed\n");
+		drm_err(adev_to_drm(dm->adev), "connector_init failed\n");
 		aconnector->connector_id = -1;
 		goto out_free;
 	}
@@ -9206,13 +9296,13 @@ static void amdgpu_dm_update_cursor(struct drm_plane *plane,
 	if (crtc_state->stream) {
 		if (!dc_stream_set_cursor_attributes(crtc_state->stream,
 						     &attributes))
-			DRM_ERROR("DC failed to set cursor attributes\n");
+			drm_err(adev_to_drm(adev), "DC failed to set cursor attributes\n");
 
 		update->cursor_attributes = &crtc_state->stream->cursor_attributes;
 
 		if (!dc_stream_set_cursor_position(crtc_state->stream,
 						   &position))
-			DRM_ERROR("DC failed to set cursor position\n");
+			drm_err(adev_to_drm(adev), "DC failed to set cursor position\n");
 
 		update->cursor_position = &crtc_state->stream->cursor_position;
 	}
@@ -9463,7 +9553,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 		bundle->surface_updates[planes_count].surface = dc_plane;
 
 		if (!bundle->surface_updates[planes_count].surface) {
-			DRM_ERROR("No surface for CRTC: id=%d\n",
+			drm_err(dev, "No surface for CRTC: id=%d\n",
 					acrtc_attach->crtc_id);
 			continue;
 		}
@@ -9979,20 +10069,20 @@ static void dm_set_writeback(struct amdgpu_display_manager *dm,
 
 	wb_info = kzalloc(sizeof(*wb_info), GFP_KERNEL);
 	if (!wb_info) {
-		DRM_ERROR("Failed to allocate wb_info\n");
+		drm_err(adev_to_drm(adev), "Failed to allocate wb_info\n");
 		return;
 	}
 
 	acrtc = to_amdgpu_crtc(wb_conn->encoder.crtc);
 	if (!acrtc) {
-		DRM_ERROR("no amdgpu_crtc found\n");
+		drm_err(adev_to_drm(adev), "no amdgpu_crtc found\n");
 		kfree(wb_info);
 		return;
 	}
 
 	afb = to_amdgpu_framebuffer(new_con_state->writeback_job->fb);
 	if (!afb) {
-		DRM_ERROR("No amdgpu_framebuffer found\n");
+		drm_err(adev_to_drm(adev), "No amdgpu_framebuffer found\n");
 		kfree(wb_info);
 		return;
 	}
@@ -10213,7 +10303,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 				new_con_state->content_protection >= DRM_MODE_CONTENT_PROTECTION_DESIRED)
 				enable_encryption = true;
 
-			DRM_INFO("[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption);
+			drm_info(adev_to_drm(adev), "[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption);
 
 			if (aconnector->dc_link)
 				hdcp_update_display(
@@ -10305,7 +10395,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 		 */
 		dummy_updates = kzalloc(sizeof(struct dc_surface_update) * MAX_SURFACES, GFP_ATOMIC);
 		if (!dummy_updates) {
-			DRM_ERROR("Failed to allocate memory for dummy_updates.\n");
+			drm_err(adev_to_drm(adev), "Failed to allocate memory for dummy_updates.\n");
 			continue;
 		}
 		for (j = 0; j < status->plane_count; j++)
@@ -10513,16 +10603,20 @@ static int dm_force_atomic_commit(struct drm_connector *connector)
 	 */
 	conn_state = drm_atomic_get_connector_state(state, connector);
 
-	ret = PTR_ERR_OR_ZERO(conn_state);
-	if (ret)
+	/* Check for error in getting connector state */
+	if (IS_ERR(conn_state)) {
+		ret = PTR_ERR(conn_state);
 		goto out;
+	}
 
 	/* Attach crtc to drm_atomic_state*/
 	crtc_state = drm_atomic_get_crtc_state(state, &disconnected_acrtc->base);
 
-	ret = PTR_ERR_OR_ZERO(crtc_state);
-	if (ret)
+	/* Check for error in getting crtc state */
+	if (IS_ERR(crtc_state)) {
+		ret = PTR_ERR(crtc_state);
 		goto out;
+	}
 
 	/* force a restore */
 	crtc_state->mode_changed = true;
@@ -10530,9 +10624,11 @@ static int dm_force_atomic_commit(struct drm_connector *connector)
 	/* Attach plane to drm_atomic_state */
 	plane_state = drm_atomic_get_plane_state(state, plane);
 
-	ret = PTR_ERR_OR_ZERO(plane_state);
-	if (ret)
+	/* Check for error in getting plane state */
+	if (IS_ERR(plane_state)) {
+		ret = PTR_ERR(plane_state);
 		goto out;
+	}
 
 	/* Call commit internally with the state we just constructed */
 	ret = drm_atomic_commit(state);
@@ -10540,7 +10636,7 @@ static int dm_force_atomic_commit(struct drm_connector *connector)
 out:
 	drm_atomic_state_put(state);
 	if (ret)
-		DRM_ERROR("Restoring old state failed with %i\n", ret);
+		drm_err(ddev, "Restoring old state failed with %i\n", ret);
 
 	return ret;
 }
@@ -10624,7 +10720,7 @@ static int do_aquire_global_lock(struct drm_device *dev,
 					&commit->flip_done, 10*HZ);
 
 		if (ret == 0)
-			DRM_ERROR("[CRTC:%d:%s] hw_done or flip_done timed out\n",
+			drm_err(dev, "[CRTC:%d:%s] hw_done or flip_done timed out\n",
 				  crtc->base.id, crtc->name);
 
 		drm_crtc_commit_put(commit);
@@ -10769,8 +10865,8 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 		drm_old_conn_state = drm_atomic_get_old_connector_state(state,
 									connector);
 
-		if (IS_ERR(drm_new_conn_state)) {
-			ret = PTR_ERR_OR_ZERO(drm_new_conn_state);
+		if (WARN_ON(!drm_new_conn_state)) {
+			ret = -EINVAL;
 			goto fail;
 		}
 
@@ -11743,7 +11839,7 @@ static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev,
 		old_plane_state = drm_atomic_get_plane_state(state, plane);
 
 		if (IS_ERR(new_plane_state) || IS_ERR(old_plane_state)) {
-			DRM_ERROR("Failed to get plane state for plane %s\n", plane->name);
+			drm_err(dev, "Failed to get plane state for plane %s\n", plane->name);
 			return false;
 		}
 
@@ -12312,7 +12408,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm,
 
 	res = dc_wake_and_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
 	if (!res) {
-		DRM_ERROR("EDID CEA parser failed\n");
+		drm_err(adev_to_drm(dm->adev), "EDID CEA parser failed\n");
 		return false;
 	}
 
@@ -12320,7 +12416,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm,
 
 	if (output->type == DMUB_CMD__EDID_CEA_ACK) {
 		if (!output->ack.success) {
-			DRM_ERROR("EDID CEA ack failed at offset %d\n",
+			drm_err(adev_to_drm(dm->adev), "EDID CEA ack failed at offset %d\n",
 					output->ack.offset);
 		}
 	} else if (output->type == DMUB_CMD__EDID_CEA_AMD_VSDB) {
@@ -12332,7 +12428,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm,
 		vsdb->min_refresh_rate_hz = output->amd_vsdb.min_frame_rate;
 		vsdb->max_refresh_rate_hz = output->amd_vsdb.max_frame_rate;
 	} else {
-		DRM_WARN("Unknown EDID CEA parser results\n");
+		drm_warn(adev_to_drm(dm->adev), "Unknown EDID CEA parser results\n");
 		return false;
 	}
 
@@ -12548,7 +12644,7 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
 	enum adaptive_sync_type as_type = ADAPTIVE_SYNC_TYPE_NONE;
 
 	if (!connector->state) {
-		DRM_ERROR("%s - Connector has no state", __func__);
+		drm_err(adev_to_drm(adev), "%s - Connector has no state", __func__);
 		goto update;
 	}
 
@@ -12733,7 +12829,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync(
 	}
 
 	if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
-		DRM_ERROR("wait_for_completion_timeout timeout!");
+		drm_err(adev_to_drm(adev), "wait_for_completion_timeout timeout!");
 		*operation_result = AUX_RET_ERROR_TIMEOUT;
 		goto out;
 	}
@@ -12743,8 +12839,8 @@ int amdgpu_dm_process_dmub_aux_transfer_sync(
 		 * Transient states before tunneling is enabled could
 		 * lead to this error. We can ignore this for now.
 		 */
-		if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) {
-			DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n",
+		if (p_notify->result == AUX_RET_ERROR_PROTOCOL_ERROR) {
+			drm_warn(adev_to_drm(adev), "DPIA AUX failed on 0x%x(%d), error %d\n",
 					payload->address, payload->length,
 					p_notify->result);
 		}
@@ -12752,22 +12848,14 @@ int amdgpu_dm_process_dmub_aux_transfer_sync(
 		goto out;
 	}
 
+	payload->reply[0] = adev->dm.dmub_notify->aux_reply.command & 0xF;
+	if (adev->dm.dmub_notify->aux_reply.command & 0xF0)
+		/* The reply is stored in the top nibble of the command. */
+		payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF;
 
-	payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
-	if (!payload->write && p_notify->aux_reply.length &&
-			(payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) {
-
-		if (payload->length != p_notify->aux_reply.length) {
-			DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n",
-				p_notify->aux_reply.length,
-					payload->address, payload->length);
-			*operation_result = AUX_RET_ERROR_INVALID_REPLY;
-			goto out;
-		}
-
+	if (!payload->write && p_notify->aux_reply.length)
 		memcpy(payload->data, p_notify->aux_reply.data,
 				p_notify->aux_reply.length);
-	}
 
 	/* success */
 	ret = p_notify->aux_reply.length;
@@ -12778,6 +12866,79 @@ out:
 	return ret;
 }
 
+static void abort_fused_io(
+		struct dc_context *ctx,
+		const struct dmub_cmd_fused_request *request
+)
+{
+	union dmub_rb_cmd command = { 0 };
+	struct dmub_rb_cmd_fused_io *io = &command.fused_io;
+
+	io->header.type = DMUB_CMD__FUSED_IO;
+	io->header.sub_type = DMUB_CMD__FUSED_IO_ABORT;
+	io->header.payload_bytes = sizeof(*io) - sizeof(io->header);
+	io->request = *request;
+	dm_execute_dmub_cmd(ctx, &command, DM_DMUB_WAIT_TYPE_NO_WAIT);
+}
+
+static bool execute_fused_io(
+		struct amdgpu_device *dev,
+		struct dc_context *ctx,
+		union dmub_rb_cmd *commands,
+		uint8_t count,
+		uint32_t timeout_us
+)
+{
+	const uint8_t ddc_line = commands[0].fused_io.request.u.aux.ddc_line;
+
+	if (ddc_line >= ARRAY_SIZE(dev->dm.fused_io))
+		return false;
+
+	struct fused_io_sync *sync = &dev->dm.fused_io[ddc_line];
+	struct dmub_rb_cmd_fused_io *first = &commands[0].fused_io;
+	const bool result = dm_execute_dmub_cmd_list(ctx, count, commands, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)
+			&& first->header.ret_status
+			&& first->request.status == FUSED_REQUEST_STATUS_SUCCESS;
+
+	if (!result)
+		return false;
+
+	while (wait_for_completion_timeout(&sync->replied, usecs_to_jiffies(timeout_us))) {
+		reinit_completion(&sync->replied);
+
+		struct dmub_cmd_fused_request *reply = (struct dmub_cmd_fused_request *) sync->reply_data;
+
+		static_assert(sizeof(*reply) <= sizeof(sync->reply_data), "Size mismatch");
+
+		if (reply->identifier == first->request.identifier) {
+			first->request = *reply;
+			return true;
+		}
+	}
+
+	reinit_completion(&sync->replied);
+	first->request.status = FUSED_REQUEST_STATUS_TIMEOUT;
+	abort_fused_io(ctx, &first->request);
+	return false;
+}
+
+bool amdgpu_dm_execute_fused_io(
+		struct amdgpu_device *dev,
+		struct dc_link *link,
+		union dmub_rb_cmd *commands,
+		uint8_t count,
+		uint32_t timeout_us)
+{
+	struct amdgpu_display_manager *dm = &dev->dm;
+
+	mutex_lock(&dm->dpia_aux_lock);
+
+	const bool result = execute_fused_io(dev, link->ctx, commands, count, timeout_us);
+
+	mutex_unlock(&dm->dpia_aux_lock);
+	return result;
+}
+
 int amdgpu_dm_process_dmub_set_config_sync(
 		struct dc_context *ctx,
 		unsigned int link_index,
@@ -12796,7 +12957,7 @@ int amdgpu_dm_process_dmub_set_config_sync(
 		ret = 0;
 		*operation_result = adev->dm.dmub_notify->sc_status;
 	} else {
-		DRM_ERROR("wait_for_completion_timeout timeout!");
+		drm_err(adev_to_drm(adev), "wait_for_completion_timeout timeout!");
 		ret = -1;
 		*operation_result = SET_CONFIG_UNKNOWN_ERROR;
 	}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 385faaca6e26..4b615071e93b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -50,7 +50,7 @@
 
 #define AMDGPU_DM_MAX_NUM_EDP 2
 
-#define AMDGPU_DMUB_NOTIFICATION_MAX 7
+#define AMDGPU_DMUB_NOTIFICATION_MAX 8
 
 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A
 #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40
@@ -81,6 +81,7 @@ struct amdgpu_bo;
 struct dmub_srv;
 struct dc_plane_state;
 struct dmub_notification;
+struct dmub_cmd_fused_request;
 
 struct amd_vsdb_block {
 	unsigned char ieee_id[3];
@@ -276,6 +277,10 @@ struct hpd_rx_irq_offload_work {
 	 * @offload_wq: offload work queue that this work is queued to
 	 */
 	struct hpd_rx_irq_offload_work_queue *offload_wq;
+	/**
+	 * @adev: amdgpu_device pointer
+	 */
+	struct amdgpu_device *adev;
 };
 
 /**
@@ -633,6 +638,16 @@ struct amdgpu_display_manager {
 	 * OEM i2c bus
 	 */
 	struct amdgpu_i2c_adapter *oem_i2c;
+
+	/**
+	 * @fused_io:
+	 *
+	 * dmub fused io interface
+	 */
+	struct fused_io_sync {
+		struct completion replied;
+		char reply_data[0x40];  // Cannot include dmub_cmd here
+	} fused_io[8];
 };
 
 enum dsc_clock_force_state {
@@ -1012,6 +1027,14 @@ extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs;
 int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index,
 					struct aux_payload *payload, enum aux_return_code_type *operation_result);
 
+bool amdgpu_dm_execute_fused_io(
+		struct amdgpu_device *dev,
+		struct dc_link *link,
+		union dmub_rb_cmd *commands,
+		uint8_t count,
+		uint32_t timeout_us
+);
+
 int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index,
 					struct set_config_cmd_payload *payload, enum set_config_status *operation_result);
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index 8f22ad966543..c16962256514 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -26,6 +26,7 @@
 #include "amdgpu_dm_hdcp.h"
 #include "amdgpu.h"
 #include "amdgpu_dm.h"
+#include "dc_fused_io.h"
 #include "dm_helpers.h"
 #include <drm/display/drm_hdcp_helper.h>
 #include "hdcp_psp.h"
@@ -76,6 +77,34 @@ lp_read_dpcd(void *handle, uint32_t address, uint8_t *data, uint32_t size)
 	return dm_helpers_dp_read_dpcd(link->ctx, link, address, data, size);
 }
 
+static bool lp_atomic_write_poll_read_i2c(
+		void *handle,
+		const struct mod_hdcp_atomic_op_i2c *write,
+		const struct mod_hdcp_atomic_op_i2c *poll,
+		struct mod_hdcp_atomic_op_i2c *read,
+		uint32_t poll_timeout_us,
+		uint8_t poll_mask_msb
+)
+{
+	struct dc_link *link = handle;
+
+	return dm_atomic_write_poll_read_i2c(link, write, poll, read, poll_timeout_us, poll_mask_msb);
+}
+
+static bool lp_atomic_write_poll_read_aux(
+		void *handle,
+		const struct mod_hdcp_atomic_op_aux *write,
+		const struct mod_hdcp_atomic_op_aux *poll,
+		struct mod_hdcp_atomic_op_aux *read,
+		uint32_t poll_timeout_us,
+		uint8_t poll_mask_msb
+)
+{
+	struct dc_link *link = handle;
+
+	return dm_atomic_write_poll_read_aux(link, write, poll, read, poll_timeout_us, poll_mask_msb);
+}
+
 static uint8_t *psp_get_srm(struct psp_context *psp, uint32_t *srm_version, uint32_t *srm_size)
 {
 	struct ta_hdcp_shared_memory *hdcp_cmd;
@@ -732,7 +761,10 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev,
 		INIT_DELAYED_WORK(&hdcp_work[i].watchdog_timer_dwork, event_watchdog_timer);
 		INIT_DELAYED_WORK(&hdcp_work[i].property_validate_dwork, event_property_validate);
 
-		hdcp_work[i].hdcp.config.psp.handle = &adev->psp;
+		struct mod_hdcp_config *config = &hdcp_work[i].hdcp.config;
+		struct mod_hdcp_ddc_funcs *ddc_funcs = &config->ddc.funcs;
+
+		config->psp.handle = &adev->psp;
 		if (dc->ctx->dce_version == DCN_VERSION_3_1 ||
 		    dc->ctx->dce_version == DCN_VERSION_3_14 ||
 		    dc->ctx->dce_version == DCN_VERSION_3_15 ||
@@ -740,12 +772,22 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev,
 		    dc->ctx->dce_version == DCN_VERSION_3_51 ||
 		    dc->ctx->dce_version == DCN_VERSION_3_6 ||
 		    dc->ctx->dce_version == DCN_VERSION_3_16)
-			hdcp_work[i].hdcp.config.psp.caps.dtm_v3_supported = 1;
-		hdcp_work[i].hdcp.config.ddc.handle = dc_get_link_at_index(dc, i);
-		hdcp_work[i].hdcp.config.ddc.funcs.write_i2c = lp_write_i2c;
-		hdcp_work[i].hdcp.config.ddc.funcs.read_i2c = lp_read_i2c;
-		hdcp_work[i].hdcp.config.ddc.funcs.write_dpcd = lp_write_dpcd;
-		hdcp_work[i].hdcp.config.ddc.funcs.read_dpcd = lp_read_dpcd;
+			config->psp.caps.dtm_v3_supported = 1;
+		config->ddc.handle = dc_get_link_at_index(dc, i);
+
+		ddc_funcs->write_i2c = lp_write_i2c;
+		ddc_funcs->read_i2c = lp_read_i2c;
+		ddc_funcs->write_dpcd = lp_write_dpcd;
+		ddc_funcs->read_dpcd = lp_read_dpcd;
+
+		config->debug.lc_enable_sw_fallback = dc->debug.hdcp_lc_enable_sw_fallback;
+		if (dc->caps.fused_io_supported || dc->debug.hdcp_lc_force_fw_enable) {
+			ddc_funcs->atomic_write_poll_read_i2c = lp_atomic_write_poll_read_i2c;
+			ddc_funcs->atomic_write_poll_read_aux = lp_atomic_write_poll_read_aux;
+		} else {
+			ddc_funcs->atomic_write_poll_read_i2c = NULL;
+			ddc_funcs->atomic_write_poll_read_aux = NULL;
+		}
 
 		memset(hdcp_work[i].aconnector, 0,
 		       sizeof(struct amdgpu_dm_connector *) *
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 1395a748d726..d4395b92fb85 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -630,6 +630,19 @@ bool dm_helpers_submit_i2c(
 	return result;
 }
 
+bool dm_helpers_execute_fused_io(
+		struct dc_context *ctx,
+		struct dc_link *link,
+		union dmub_rb_cmd *commands,
+		uint8_t count,
+		uint32_t timeout_us
+)
+{
+	struct amdgpu_device *dev = ctx->driver_context;
+
+	return amdgpu_dm_execute_fused_io(dev, link, commands, count, timeout_us);
+}
+
 static bool execute_synaptics_rc_command(struct drm_dp_aux *aux,
 		bool is_write_cmd,
 		unsigned char cmd,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 7ceedf626d23..d19aea595722 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -51,6 +51,9 @@
 
 #define PEAK_FACTOR_X1000 1006
 
+/*
+ * This function handles both native AUX and I2C-Over-AUX transactions.
+ */
 static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
 				  struct drm_dp_aux_msg *msg)
 {
@@ -87,15 +90,25 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
 	if (adev->dm.aux_hpd_discon_quirk) {
 		if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE &&
 			operation_result == AUX_RET_ERROR_HPD_DISCON) {
-			result = 0;
+			result = msg->size;
 			operation_result = AUX_RET_SUCCESS;
 		}
 	}
 
-	if (payload.write && result >= 0)
-		result = msg->size;
+	/*
+	 * result equals to 0 includes the cases of AUX_DEFER/I2C_DEFER
+	 */
+	if (payload.write && result >= 0) {
+		if (result) {
+			/*one byte indicating partially written bytes. Force 0 to retry*/
+			drm_info(adev_to_drm(adev), "amdgpu: AUX partially written\n");
+			result = 0;
+		} else if (!payload.reply[0])
+			/*I2C_ACK|AUX_ACK*/
+			result = msg->size;
+	}
 
-	if (result < 0)
+	if (result < 0) {
 		switch (operation_result) {
 		case AUX_RET_SUCCESS:
 			break;
@@ -114,6 +127,13 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
 			break;
 		}
 
+		drm_info(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result);
+	}
+
+	if (payload.reply[0])
+		drm_info(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.",
+			payload.reply[0]);
+
 	return result;
 }
 
@@ -1713,16 +1733,17 @@ static bool is_dsc_common_config_possible(struct dc_stream_state *stream,
 					  struct dc_dsc_bw_range *bw_range)
 {
 	struct dc_dsc_policy dsc_policy = {0};
+	bool is_dsc_possible;
 
 	dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link));
-	dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0],
-				       stream->sink->ctx->dc->debug.dsc_min_slice_height_override,
-				       dsc_policy.min_target_bpp * 16,
-				       dsc_policy.max_target_bpp * 16,
-				       &stream->sink->dsc_caps.dsc_dec_caps,
-				       &stream->timing, dc_link_get_highest_encoding_format(stream->link), bw_range);
-
-	return bw_range->max_target_bpp_x16 && bw_range->min_target_bpp_x16;
+	is_dsc_possible = dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0],
+							 stream->sink->ctx->dc->debug.dsc_min_slice_height_override,
+							 dsc_policy.min_target_bpp * 16,
+							 dsc_policy.max_target_bpp * 16,
+							 &stream->sink->dsc_caps.dsc_dec_caps,
+							 &stream->timing, dc_link_get_highest_encoding_format(stream->link), bw_range);
+
+	return is_dsc_possible;
 }
 #endif
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 3e0f45f1711c..b7c6e8d13435 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -948,13 +948,13 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane,
 	adev = amdgpu_ttm_adev(rbo->tbo.bdev);
 	r = amdgpu_bo_reserve(rbo, true);
 	if (r) {
-		dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
+		drm_err(adev_to_drm(adev), "fail to reserve bo (%d)\n", r);
 		return r;
 	}
 
 	r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1);
 	if (r) {
-		dev_err(adev->dev, "reserving fence slot failed (%d)\n", r);
+		drm_err(adev_to_drm(adev), "reserving fence slot failed (%d)\n", r);
 		goto error_unlock;
 	}
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
index e140b7a04d72..f984cb0cb889 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
@@ -87,14 +87,6 @@ void amdgpu_dm_set_psr_caps(struct dc_link *link)
 
 		link->psr_settings.psr_feature_enabled = true;
 	}
-
-	DRM_INFO("PSR support %d, DC PSR ver %d, sink PSR ver %d DPCD caps 0x%x su_y_granularity %d\n",
-		link->psr_settings.psr_feature_enabled,
-		link->psr_settings.psr_version,
-		link->dpcd_caps.psr_info.psr_version,
-		link->dpcd_caps.psr_info.psr_dpcd_caps.raw,
-		link->dpcd_caps.psr_info.psr2_su_y_granularity_cap);
-
 }
 
 /*
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
index 0d5fefb0f591..d9527c05fc87 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
@@ -102,13 +102,13 @@ static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector
 
 	r = amdgpu_bo_reserve(rbo, true);
 	if (r) {
-		dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
+		drm_err(adev_to_drm(adev), "fail to reserve bo (%d)\n", r);
 		return r;
 	}
 
 	r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1);
 	if (r) {
-		dev_err(adev->dev, "reserving fence slot failed (%d)\n", r);
+		drm_err(adev_to_drm(adev), "reserving fence slot failed (%d)\n", r);
 		goto error_unlock;
 	}
 
diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile
index 3e1f5b689718..3c9ecea7eebc 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -53,31 +53,30 @@ DC_LIBS += hdcp
 
 ifdef CONFIG_DRM_AMD_DC_FP
 DC_LIBS += sspl
-DC_SPL_TRANS += dc_spl_translate.o
+AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/, dc_spl_translate.o)
 endif
 
 AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LIBS)))
 
 include $(AMD_DC)
 
-DISPLAY_CORE = dc.o dc_stat.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
-dc_surface.o dc_debug.o dc_stream.o dc_link_enc_cfg.o dc_link_exports.o dc_state.o
+FILES =
+FILES += dc_dmub_srv.o
+FILES += dc_edid_parser.o
+FILES += dc_fused_io.o
+FILES += dc_helper.o
+FILES += core/dc.o
+FILES += core/dc_debug.o
+FILES += core/dc_hw_sequencer.o
+FILES += core/dc_link_enc_cfg.o
+FILES += core/dc_link_exports.o
+FILES += core/dc_resource.o
+FILES += core/dc_sink.o
+FILES += core/dc_stat.o
+FILES += core/dc_state.o
+FILES += core/dc_stream.o
+FILES += core/dc_surface.o
+FILES += core/dc_vm_helper.o
+
+AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/, $(FILES))
 
-DISPLAY_CORE += dc_vm_helper.o
-
-AMD_DISPLAY_CORE = $(addprefix $(AMDDALPATH)/dc/core/,$(DISPLAY_CORE))
-
-AMD_DM_REG_UPDATE = $(addprefix $(AMDDALPATH)/dc/,dc_helper.o)
-
-AMD_DC_SPL_TRANS = $(addprefix $(AMDDALPATH)/dc/,$(DC_SPL_TRANS))
-
-AMD_DISPLAY_FILES += $(AMD_DISPLAY_CORE)
-AMD_DISPLAY_FILES += $(AMD_DM_REG_UPDATE)
-
-DC_DMUB += dc_dmub_srv.o
-DC_EDID += dc_edid_parser.o
-AMD_DISPLAY_DMUB = $(addprefix $(AMDDALPATH)/dc/,$(DC_DMUB))
-AMD_DISPLAY_EDID = $(addprefix $(AMDDALPATH)/dc/,$(DC_EDID))
-AMD_DISPLAY_FILES += $(AMD_DISPLAY_DMUB) $(AMD_DISPLAY_EDID)
-
-AMD_DISPLAY_FILES += $(AMD_DC_SPL_TRANS)
diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
index 88d3f9d7dd55..452206b5095e 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
@@ -51,8 +51,6 @@ static inline unsigned long long complete_integer_division_u64(
 {
 	unsigned long long result;
 
-	ASSERT(divisor);
-
 	result = div64_u64_rem(dividend, divisor, remainder);
 
 	return result;
@@ -213,9 +211,6 @@ struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg)
 	 * @note
 	 * Good idea to use Newton's method
 	 */
-
-	ASSERT(arg.value);
-
 	return dc_fixpt_from_fraction(
 		dc_fixpt_one.value,
 		arg.value);
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
index 3bacf470f7c5..67f08495b7e6 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
@@ -2384,10 +2384,10 @@ static enum bp_result get_integrated_info_v8(
 }
 
 /*
- * get_integrated_info_v8
+ * get_integrated_info_v9
  *
  * @brief
- * Get V8 integrated BIOS information
+ * Get V9 integrated BIOS information
  *
  * @param
  * bios_parser *bp - [in]BIOS parser handler to get master data table
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
index 19897fa52e7e..d82a52319088 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
@@ -142,17 +142,3 @@ int rv1_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_di
 
 	return actual_dispclk_set_mhz * 1000;
 }
-
-int rv1_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr)
-{
-	int actual_dprefclk_set_mhz = -1;
-
-	actual_dprefclk_set_mhz = rv1_vbios_smu_send_msg_with_param(
-			clk_mgr,
-			VBIOSSMC_MSG_SetDprefclkFreq,
-			khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz));
-
-	/* TODO: add code for programing DP DTO, currently this is down by command table */
-
-	return actual_dprefclk_set_mhz * 1000;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h
index 083cb3158859..81d7c912549c 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h
@@ -27,6 +27,5 @@
 #define DAL_DC_DCN10_RV1_CLK_MGR_VBIOS_SMU_H_
 
 int rv1_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz);
-int rv1_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr);
 
 #endif /* DAL_DC_DCN10_RV1_CLK_MGR_VBIOS_SMU_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
index 23b390245b5d..5a633333dbb5 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
@@ -164,20 +164,6 @@ int rn_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dis
 	return actual_dispclk_set_mhz * 1000;
 }
 
-int rn_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr)
-{
-	int actual_dprefclk_set_mhz = -1;
-
-	actual_dprefclk_set_mhz = rn_vbios_smu_send_msg_with_param(
-			clk_mgr,
-			VBIOSSMC_MSG_SetDprefclkFreq,
-			khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz));
-
-	/* TODO: add code for programing DP DTO, currently this is down by command table */
-
-	return actual_dprefclk_set_mhz * 1000;
-}
-
 int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz)
 {
 	int actual_dcfclk_set_mhz = -1;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h
index 1ce19d875358..f76fad87f0e1 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h
@@ -30,7 +30,6 @@ enum dcn_pwr_state;
 
 int rn_vbios_smu_get_smu_version(struct clk_mgr_internal *clk_mgr);
 int rn_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz);
-int rn_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr);
 int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz);
 int rn_vbios_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz);
 void rn_vbios_smu_set_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phyclk_khz);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
index 2d14346b680e..478b4d6a3544 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
@@ -49,12 +49,9 @@ static const struct IP_BASE MP0_BASE = { { { { 0x00016000, 0x00DC0000, 0x00E0000
 					{ { 0, 0, 0, 0, 0, 0 } },
 					{ { 0, 0, 0, 0, 0, 0 } },
 					{ { 0, 0, 0, 0, 0, 0 } } } };
-static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D20, 0x00010400, 0x0241B000, 0x04040000 } },
-					{ { 0, 0, 0, 0, 0, 0 } },
-					{ { 0, 0, 0, 0, 0, 0 } },
-					{ { 0, 0, 0, 0, 0, 0 } },
-					{ { 0, 0, 0, 0, 0, 0 } },
-					{ { 0, 0, 0, 0, 0, 0 } } } };
+
+#define CTX clk_mgr->base.ctx
+#define IND_REG(offset)	offset
 
 #define regBIF_BX_PF2_RSMU_INDEX                                                                        0x0000
 #define regBIF_BX_PF2_RSMU_INDEX_BASE_IDX                                                               1
@@ -67,9 +64,6 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D
 #define FN(reg_name, field) \
 	FD(reg_name##__##field)
 
-#define REG_NBIO(reg_name) \
-	(NBIO_BASE.instance[0].segment[regBIF_BX_PF2_ ## reg_name ## _BASE_IDX] + regBIF_BX_PF2_ ## reg_name)
-
 #undef DC_LOGGER
 #define DC_LOGGER \
 	CTX->logger
@@ -77,6 +71,13 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D
 
 #define mmMP1_C2PMSG_3                            0x3B1050C
 
+#define reg__MP1_C2PMSG_3_MASK					(0xFFFFFFFF)
+#define reg__MP1_C2PMSG_3__SHIFT					(0)
+
+
+#define data_reg_name__MP1_C2PMSG_3_MASK		(0xFFFFFFFF)
+#define data_reg_name__MP1_C2PMSG_3__SHIFT		(0)
+
 #define VBIOSSMC_MSG_TestMessage                  0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team
 #define VBIOSSMC_MSG_GetPmfwVersion               0x02 ///< Get PMFW version
 #define VBIOSSMC_MSG_Spare0                       0x03 ///< Spare0
@@ -153,12 +154,10 @@ static int dcn315_smu_send_msg_with_param(
 
 	for (i = 0; i < SMU_REGISTER_WRITE_RETRY_COUNT; i++) {
 		/* Trigger the message transaction by writing the message ID */
-		generic_write_indirect_reg(CTX,
-			REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA),
-			mmMP1_C2PMSG_3, msg_id);
-		read_back_data = generic_read_indirect_reg(CTX,
-			REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA),
-			mmMP1_C2PMSG_3);
+		IX_REG_SET_SYNC(mmMP1_C2PMSG_3, 0,
+			MP1_C2PMSG_3, msg_id);
+		IX_REG_GET_SYNC(mmMP1_C2PMSG_3,
+			MP1_C2PMSG_3, &read_back_data);
 		if (read_back_data == msg_id)
 			break;
 		udelay(2);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c
index 6a6ae618650b..4607eff07253 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c
@@ -65,6 +65,7 @@
 #define mmCLK1_CLK5_ALLOW_DS 0x16EB1
 
 #define mmCLK5_spll_field_8 0x1B04B
+#define mmCLK6_spll_field_8 0x1B24B
 #define mmDENTIST_DISPCLK_CNTL 0x0124
 #define regDENTIST_DISPCLK_CNTL 0x0064
 #define regDENTIST_DISPCLK_CNTL_BASE_IDX 1
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 142de8938d7c..bb1ac12a2b09 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -90,6 +90,7 @@
 #define mmCLK1_CLK5_ALLOW_DS 0x16EB1
 
 #define mmCLK5_spll_field_8 0x1B24B
+#define mmCLK6_spll_field_8 0x1B24B
 #define mmDENTIST_DISPCLK_CNTL 0x0124
 #define regDENTIST_DISPCLK_CNTL 0x0064
 #define regDENTIST_DISPCLK_CNTL_BASE_IDX 1
@@ -116,6 +117,7 @@
 #define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L
 
 #define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L
+#define CLK6_spll_field_8__spll_ssc_en_MASK 0x00002000L
 
 #define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0
 #undef FN
@@ -596,7 +598,11 @@ static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
 
 	uint32_t ssc_enable;
 
-	ssc_enable = REG_READ(CLK5_spll_field_8) & CLK5_spll_field_8__spll_ssc_en_MASK;
+	if (clk_mgr_base->ctx->dce_version == DCN_VERSION_3_51) {
+		ssc_enable = REG_READ(CLK6_spll_field_8) & CLK6_spll_field_8__spll_ssc_en_MASK;
+	} else {
+		ssc_enable = REG_READ(CLK5_spll_field_8) & CLK5_spll_field_8__spll_ssc_en_MASK;
+	}
 
 	return ssc_enable != 0;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
index f6f0e6a33001..604d256cb47a 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
@@ -84,8 +84,8 @@
 #define VBIOSSMC_MSG_AllowZstatesEntry            0x15
 #define VBIOSSMC_MSG_DisallowZstatesEntry     	  0x16
 #define VBIOSSMC_MSG_SetDtbClk                    0x17
-#define VBIOSSMC_MSG_DispPsrEntry                 0x18 ///< Display PSR entry, DMU
-#define VBIOSSMC_MSG_DispPsrExit                  0x19 ///< Display PSR exit, DMU
+#define VBIOSSMC_MSG_DispIPS2Entry                0x18 ///< Display IPS2 entry, DMU
+#define VBIOSSMC_MSG_DispIPS2Exit                 0x19 ///< Display IPS2 exit, DMU
 #define VBIOSSMC_MSG_DisableLSdma                 0x1A ///< Disable LSDMA; only sent by VBIOS
 #define VBIOSSMC_MSG_DpControllerPhyStatus        0x1B ///< Inform PMFW about the pre conditions for turning SLDO2 on/off . bit[0]==1 precondition is met, bit[1-2] are for DPPHY number
 #define VBIOSSMC_MSG_QueryIPS2Support             0x1C ///< Return 1: support; else not supported
@@ -475,7 +475,7 @@ int dcn35_smu_exit_low_power_state(struct clk_mgr_internal *clk_mgr)
 
 	retv = dcn35_smu_send_msg_with_param(
 		clk_mgr,
-		VBIOSSMC_MSG_DispPsrExit,
+		VBIOSSMC_MSG_DispIPS2Exit,
 		0);
 	smu_print("%s: smu_exit_low_power_state return = %d\n", __func__, retv);
 	return retv;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 28d1353f403d..afd298e69d4e 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -37,6 +37,7 @@
 #include "dc_state.h"
 #include "dc_state_priv.h"
 #include "dc_plane_priv.h"
+#include "dc_stream_priv.h"
 
 #include "gpio_service_interface.h"
 #include "clk_mgr.h"
@@ -1192,6 +1193,12 @@ static void apply_ctx_interdependent_lock(struct dc *dc,
 
 static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx)
 {
+	if (dc->debug.visual_confirm & VISUAL_CONFIRM_EXPLICIT) {
+		memcpy(&pipe_ctx->visual_confirm_color, &pipe_ctx->plane_state->visual_confirm_color,
+		sizeof(pipe_ctx->visual_confirm_color));
+		return;
+	}
+
 	if (dc->ctx->dce_version >= DCN_VERSION_1_0) {
 		memset(&pipe_ctx->visual_confirm_color, 0, sizeof(struct tg_color));
 
@@ -1225,6 +1232,51 @@ static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *conte
 	}
 }
 
+void dc_get_visual_confirm_for_stream(
+	struct dc *dc,
+	struct dc_stream_state *stream_state,
+	struct tg_color *color)
+{
+	struct dc_stream_status *stream_status = dc_stream_get_status(stream_state);
+	struct pipe_ctx *pipe_ctx;
+	int i;
+	struct dc_plane_state *plane_state = NULL;
+
+	if (!stream_status)
+		return;
+
+	switch (dc->debug.visual_confirm) {
+	case VISUAL_CONFIRM_DISABLE:
+		return;
+	case VISUAL_CONFIRM_PSR:
+	case VISUAL_CONFIRM_FAMS:
+		pipe_ctx = dc_stream_get_pipe_ctx(stream_state);
+		if (!pipe_ctx)
+			return;
+		dc_dmub_srv_get_visual_confirm_color_cmd(dc, pipe_ctx);
+		memcpy(color, &dc->ctx->dmub_srv->dmub->visual_confirm_color, sizeof(struct tg_color));
+		return;
+
+	default:
+		/* find plane with highest layer_index */
+		for (i = 0; i < stream_status->plane_count; i++) {
+			if (stream_status->plane_states[i]->visible)
+				plane_state = stream_status->plane_states[i];
+		}
+		if (!plane_state)
+			return;
+		/* find pipe that contains plane with highest layer index */
+		for (i = 0; i < MAX_PIPES; i++) {
+			struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+			if (pipe->plane_state == plane_state) {
+				memcpy(color, &pipe->visual_confirm_color, sizeof(struct tg_color));
+				return;
+			}
+		}
+	}
+}
+
 static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 {
 	int i, j;
@@ -2053,6 +2105,18 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 		dc->hwss.enable_accelerated_mode(dc, context);
 	}
 
+	if (dc->hwseq->funcs.wait_for_pipe_update_if_needed) {
+		for (i = 0; i < dc->res_pool->pipe_count; i++) {
+			pipe = &context->res_ctx.pipe_ctx[i];
+			//Only delay otg master for a given config
+			if (resource_is_pipe_type(pipe, OTG_MASTER)) {
+				//dc_commit_state_no_check is always a full update
+				dc->hwseq->funcs.wait_for_pipe_update_if_needed(dc, pipe, false);
+				break;
+			}
+		}
+	}
+
 	if (context->stream_count > get_seamless_boot_stream_count(context) ||
 		context->stream_count == 0)
 		dc->hwss.prepare_bandwidth(dc, context);
@@ -2117,6 +2181,14 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 	if (dc->hwss.program_front_end_for_ctx) {
 		dc->hwss.interdependent_update_lock(dc, context, true);
 		dc->hwss.program_front_end_for_ctx(dc, context);
+
+		if (dc->hwseq->funcs.set_wait_for_update_needed_for_pipe) {
+			for (i = 0; i < dc->res_pool->pipe_count; i++) {
+				pipe = &context->res_ctx.pipe_ctx[i];
+				dc->hwseq->funcs.set_wait_for_update_needed_for_pipe(dc, pipe);
+			}
+		}
+
 		dc->hwss.interdependent_update_lock(dc, context, false);
 		dc->hwss.post_unlock_program_front_end(dc, context);
 	}
@@ -2815,7 +2887,7 @@ static enum surface_update_type check_update_surfaces_for_stream(
 	int i;
 	enum surface_update_type overall_type = UPDATE_TYPE_FAST;
 
-	if (dc->idle_optimizations_allowed)
+	if (dc->idle_optimizations_allowed || dc_can_clear_cursor_limit(dc))
 		overall_type = UPDATE_TYPE_FULL;
 
 	if (stream_status == NULL || stream_status->plane_count != surface_count)
@@ -3219,7 +3291,7 @@ static void copy_stream_update_to_stream(struct dc *dc,
 		if (dsc_validate_context) {
 			stream->timing.dsc_cfg = *update->dsc_config;
 			stream->timing.flags.DSC = enable_dsc;
-			if (!dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context, true)) {
+			if (dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context, true) != DC_OK) {
 				stream->timing.dsc_cfg = old_dsc_cfg;
 				stream->timing.flags.DSC = old_dsc_enabled;
 				update->dsc_config = NULL;
@@ -3444,7 +3516,7 @@ static bool update_planes_and_stream_state(struct dc *dc,
 	}
 
 	if (update_type == UPDATE_TYPE_FULL) {
-		if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
+		if (dc->res_pool->funcs->validate_bandwidth(dc, context, false) != DC_OK) {
 			BREAK_TO_DEBUGGER();
 			goto fail;
 		}
@@ -3998,6 +4070,7 @@ static void commit_planes_for_stream(struct dc *dc,
 				&context->res_ctx,
 				stream);
 	ASSERT(top_pipe_to_program != NULL);
+
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
@@ -4048,6 +4121,9 @@ static void commit_planes_for_stream(struct dc *dc,
 		dc->hwss.wait_for_dcc_meta_propagation(dc, top_pipe_to_program);
 	}
 
+	if (dc->hwseq->funcs.wait_for_pipe_update_if_needed)
+		dc->hwseq->funcs.wait_for_pipe_update_if_needed(dc, top_pipe_to_program, update_type == UPDATE_TYPE_FAST);
+
 	if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
 		if (dc->hwss.subvp_pipe_control_lock)
 			dc->hwss.subvp_pipe_control_lock(dc, context, true, should_lock_all_pipes, NULL, subvp_prev_use);
@@ -4184,6 +4260,16 @@ static void commit_planes_for_stream(struct dc *dc,
 	}
 	if (dc->hwss.program_front_end_for_ctx && update_type != UPDATE_TYPE_FAST) {
 		dc->hwss.program_front_end_for_ctx(dc, context);
+
+		//Pipe busy until some frame and line #
+		if (dc->hwseq->funcs.set_wait_for_update_needed_for_pipe && update_type == UPDATE_TYPE_FULL) {
+			for (j = 0; j < dc->res_pool->pipe_count; j++) {
+				struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+
+				dc->hwseq->funcs.set_wait_for_update_needed_for_pipe(dc, pipe_ctx);
+			}
+		}
+
 		if (dc->debug.validate_dml_output) {
 			for (i = 0; i < dc->res_pool->pipe_count; i++) {
 				struct pipe_ctx *cur_pipe = &context->res_ctx.pipe_ctx[i];
@@ -4523,7 +4609,7 @@ static struct dc_state *create_minimal_transition_state(struct dc *dc,
 
 	backup_and_set_minimal_pipe_split_policy(dc, base_context, policy);
 	/* commit minimal state */
-	if (dc->res_pool->funcs->validate_bandwidth(dc, minimal_transition_context, false)) {
+	if (dc->res_pool->funcs->validate_bandwidth(dc, minimal_transition_context, false) == DC_OK) {
 		/* prevent underflow and corruption when reconfiguring pipes */
 		force_vsync_flip_in_minimal_transition_context(minimal_transition_context);
 	} else {
@@ -4958,6 +5044,9 @@ static bool full_update_required(struct dc *dc,
 	if (dc->idle_optimizations_allowed)
 		return true;
 
+	if (dc_can_clear_cursor_limit(dc))
+		return true;
+
 	return false;
 }
 
@@ -5043,7 +5132,7 @@ static bool update_planes_and_stream_v1(struct dc *dc,
 	copy_stream_update_to_stream(dc, context, stream, stream_update);
 
 	if (update_type >= UPDATE_TYPE_FULL) {
-		if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
+		if (dc->res_pool->funcs->validate_bandwidth(dc, context, false) != DC_OK) {
 			DC_ERROR("Mode validation failed for stream update!\n");
 			dc_state_release(context);
 			return false;
@@ -6187,15 +6276,22 @@ bool dc_abm_save_restore(
 void dc_query_current_properties(struct dc *dc, struct dc_current_properties *properties)
 {
 	unsigned int i;
-	bool subvp_sw_cursor_req = false;
+	unsigned int max_cursor_size = dc->caps.max_cursor_size;
+	unsigned int stream_cursor_size;
 
-	for (i = 0; i < dc->current_state->stream_count; i++) {
-		if (check_subvp_sw_cursor_fallback_req(dc, dc->current_state->streams[i]) && !dc->current_state->streams[i]->hw_cursor_req) {
-			subvp_sw_cursor_req = true;
-			break;
+	if (dc->debug.allow_sw_cursor_fallback && dc->res_pool->funcs->get_max_hw_cursor_size) {
+		for (i = 0; i < dc->current_state->stream_count; i++) {
+			stream_cursor_size = dc->res_pool->funcs->get_max_hw_cursor_size(dc,
+					dc->current_state,
+					dc->current_state->streams[i]);
+
+			if (stream_cursor_size < max_cursor_size) {
+				max_cursor_size = stream_cursor_size;
+			}
 		}
 	}
-	properties->cursor_size_limit = subvp_sw_cursor_req ? 64 : dc->caps.max_cursor_size;
+
+	properties->cursor_size_limit = max_cursor_size;
 }
 
 /**
@@ -6261,3 +6357,27 @@ unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context)
 	else
 		return 0;
 }
+
+bool dc_is_cursor_limit_pending(struct dc *dc)
+{
+	uint32_t i;
+
+	for (i = 0; i < dc->current_state->stream_count; i++) {
+		if (dc_stream_is_cursor_limit_pending(dc, dc->current_state->streams[i]))
+			return true;
+	}
+
+	return false;
+}
+
+bool dc_can_clear_cursor_limit(struct dc *dc)
+{
+	uint32_t i;
+
+	for (i = 0; i < dc->current_state->stream_count; i++) {
+		if (dc_state_can_clear_stream_cursor_subvp_limit(dc->current_state->streams[i], dc->current_state))
+			return true;
+	}
+
+	return false;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
index 650e89825968..7551d0a3fe82 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
@@ -266,6 +266,8 @@ char *dc_status_to_str(enum dc_status status)
 		return "Fail dp payload allocation";
 	case DC_FAIL_DP_LINK_BANDWIDTH:
 		return "Insufficient DP link bandwidth";
+	case DC_FAIL_HW_CURSOR_SUPPORT:
+		return "HW Cursor not supported";
 	case DC_ERROR_UNEXPECTED:
 		return "Unexpected error";
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 55b32dfbfdd6..7014b8d000bb 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -697,7 +697,7 @@ void get_fams2_visual_confirm_color(
 void hwss_build_fast_sequence(struct dc *dc,
 		struct dc_dmub_cmd *dc_dmub_cmd,
 		unsigned int dmub_cmd_count,
-		struct block_sequence block_sequence[],
+		struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE],
 		unsigned int *num_steps,
 		struct pipe_ctx *pipe_ctx,
 		struct dc_stream_status *stream_status,
@@ -896,7 +896,7 @@ void hwss_build_fast_sequence(struct dc *dc,
 }
 
 void hwss_execute_sequence(struct dc *dc,
-		struct block_sequence block_sequence[],
+		struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE],
 		int num_steps)
 {
 	unsigned int i;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 313a32248cd7..13559fe56062 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1342,32 +1342,6 @@ static void calculate_inits_and_viewports(struct pipe_ctx *pipe_ctx)
 	data->viewport_c.y += src.y / vpc_div;
 }
 
-static bool is_subvp_high_refresh_candidate(struct dc_stream_state *stream)
-{
-	uint32_t refresh_rate;
-	struct dc *dc = stream->ctx->dc;
-
-	refresh_rate = (stream->timing.pix_clk_100hz * (uint64_t)100 +
-		stream->timing.v_total * stream->timing.h_total - (uint64_t)1);
-	refresh_rate = div_u64(refresh_rate, stream->timing.v_total);
-	refresh_rate = div_u64(refresh_rate, stream->timing.h_total);
-
-	/* If there's any stream that fits the SubVP high refresh criteria,
-	 * we must return true. This is because cursor updates are asynchronous
-	 * with full updates, so we could transition into a SubVP config and
-	 * remain in HW cursor mode if there's no cursor update which will
-	 * then cause corruption.
-	 */
-	if ((refresh_rate >= 120 && refresh_rate <= 175 &&
-			stream->timing.v_addressable >= 1080 &&
-			stream->timing.v_addressable <= 2160) &&
-			(dc->current_state->stream_count > 1 ||
-			(dc->current_state->stream_count == 1 && !stream->allow_freesync)))
-		return true;
-
-	return false;
-}
-
 static enum controller_dp_test_pattern convert_dp_to_controller_test_pattern(
 				enum dp_test_pattern test_pattern)
 {
@@ -4259,6 +4233,11 @@ enum dc_status dc_validate_with_context(struct dc *dc,
 		}
 	}
 
+	/* clear subvp cursor limitations */
+	for (i = 0; i < context->stream_count; i++) {
+		dc_state_set_stream_subvp_cursor_limit(context->streams[i], context, false);
+	}
+
 	res = dc_validate_global_state(dc, context, fast_validate);
 
 	/* calculate pixel rate divider after deciding pxiel clock & odm combine  */
@@ -4385,8 +4364,7 @@ enum dc_status dc_validate_global_state(
 	result = resource_build_scaling_params_for_context(dc, new_ctx);
 
 	if (result == DC_OK)
-		if (!dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, fast_validate))
-			result = DC_FAIL_BANDWIDTH_VALIDATE;
+		result = dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, fast_validate);
 
 	return result;
 }
@@ -5538,20 +5516,6 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc,
 	return DC_OK;
 }
 
-bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream)
-{
-	if (!dc->debug.disable_subvp_high_refresh && is_subvp_high_refresh_candidate(stream))
-		return true;
-	if (dc->current_state->stream_count == 1 && stream->timing.v_addressable >= 2880 &&
-			((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120)
-		return true;
-	else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 1080 &&
-			((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120)
-		return true;
-
-	return false;
-}
-
 struct dscl_prog_data *resource_get_dscl_prog_data(struct pipe_ctx *pipe_ctx)
 {
 	return &pipe_ctx->plane_res.scl_data.dscl_prog_data;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index 1b2cce127981..4db7383720fd 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -22,6 +22,7 @@
  * Authors: AMD
  *
  */
+#include "dc_types.h"
 #include "core_types.h"
 #include "core_status.h"
 #include "dc_state.h"
@@ -812,8 +813,12 @@ enum dc_status dc_state_add_phantom_stream(const struct dc *dc,
 	if (phantom_stream_status) {
 		phantom_stream_status->mall_stream_config.type = SUBVP_PHANTOM;
 		phantom_stream_status->mall_stream_config.paired_stream = main_stream;
+		phantom_stream_status->mall_stream_config.subvp_limit_cursor_size = false;
+		phantom_stream_status->mall_stream_config.cursor_size_limit_subvp = false;
 	}
 
+	dc_state_set_stream_subvp_cursor_limit(main_stream, state, true);
+
 	return res;
 }
 
@@ -939,13 +944,20 @@ void dc_state_release_phantom_streams_and_planes(
 		const struct dc *dc,
 		struct dc_state *state)
 {
+	unsigned int phantom_count;
+	struct dc_stream_state *phantom_streams[MAX_PHANTOM_PIPES];
+	struct dc_plane_state *phantom_planes[MAX_PHANTOM_PIPES];
 	int i;
 
-	for (i = 0; i < state->phantom_stream_count; i++)
-		dc_state_release_phantom_stream(dc, state, state->phantom_streams[i]);
+	phantom_count = state->phantom_stream_count;
+	memcpy(phantom_streams, state->phantom_streams, sizeof(struct dc_stream_state *) * MAX_PHANTOM_PIPES);
+	for (i = 0; i < phantom_count; i++)
+		dc_state_release_phantom_stream(dc, state, phantom_streams[i]);
 
-	for (i = 0; i < state->phantom_plane_count; i++)
-		dc_state_release_phantom_plane(dc, state, state->phantom_planes[i]);
+	phantom_count = state->phantom_plane_count;
+	memcpy(phantom_planes, state->phantom_planes, sizeof(struct dc_plane_state *) * MAX_PHANTOM_PIPES);
+	for (i = 0; i < phantom_count; i++)
+		dc_state_release_phantom_plane(dc, state, phantom_planes[i]);
 }
 
 struct dc_stream_state *dc_state_get_stream_from_id(const struct dc_state *state, unsigned int id)
@@ -977,3 +989,94 @@ bool dc_state_is_fams2_in_use(
 
 	return is_fams2_in_use;
 }
+
+void dc_state_set_stream_subvp_cursor_limit(const struct dc_stream_state *stream,
+		struct dc_state *state,
+		bool limit)
+{
+	struct dc_stream_status *stream_status;
+
+	stream_status = dc_state_get_stream_status(state, stream);
+
+	if (stream_status) {
+		stream_status->mall_stream_config.subvp_limit_cursor_size = limit;
+	}
+}
+
+bool dc_state_get_stream_subvp_cursor_limit(const struct dc_stream_state *stream,
+		struct dc_state *state)
+{
+	bool limit = false;
+
+	struct dc_stream_status *stream_status;
+
+	stream_status = dc_state_get_stream_status(state, stream);
+
+	if (stream_status) {
+		limit = stream_status->mall_stream_config.subvp_limit_cursor_size;
+	}
+
+	return limit;
+}
+
+void dc_state_set_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+		struct dc_state *state,
+		bool limit)
+{
+	struct dc_stream_status *stream_status;
+
+	stream_status = dc_state_get_stream_status(state, stream);
+
+	if (stream_status) {
+		stream_status->mall_stream_config.cursor_size_limit_subvp = limit;
+	}
+}
+
+bool dc_state_get_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+		struct dc_state *state)
+{
+	bool limit = false;
+
+	struct dc_stream_status *stream_status;
+
+	stream_status = dc_state_get_stream_status(state, stream);
+
+	if (stream_status) {
+		limit = stream_status->mall_stream_config.cursor_size_limit_subvp;
+	}
+
+	return limit;
+}
+
+bool dc_state_can_clear_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+		struct dc_state *state)
+{
+	bool can_clear_limit = false;
+
+	struct dc_stream_status *stream_status;
+
+	stream_status = dc_state_get_stream_status(state, stream);
+
+	if (stream_status) {
+		can_clear_limit = dc_state_get_stream_cursor_subvp_limit(stream, state) &&
+				(stream_status->mall_stream_config.type == SUBVP_PHANTOM ||
+				stream->hw_cursor_req ||
+				!stream_status->mall_stream_config.subvp_limit_cursor_size ||
+				!stream->cursor_position.enable ||
+				dc_stream_check_cursor_attributes(stream, state, &stream->cursor_attributes));
+	}
+
+	return can_clear_limit;
+}
+
+bool dc_state_is_subvp_in_use(struct dc_state *state)
+{
+	uint32_t i;
+
+	for (i = 0; i < state->stream_count; i++) {
+		if (dc_state_get_stream_subvp_type(state, state->streams[i]) != SUBVP_NONE)
+			return true;
+	}
+
+	return false;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 0478dd856d8c..b883fb24fa12 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -265,13 +265,16 @@ void program_cursor_attributes(
 }
 
 /*
- * dc_stream_set_cursor_attributes() - Update cursor attributes and set cursor surface address
+ * dc_stream_check_cursor_attributes() - Check validitity of cursor attributes and surface address
  */
-bool dc_stream_set_cursor_attributes(
-	struct dc_stream_state *stream,
+bool dc_stream_check_cursor_attributes(
+	const struct dc_stream_state *stream,
+	struct dc_state *state,
 	const struct dc_cursor_attributes *attributes)
 {
-	struct dc  *dc;
+	const struct dc *dc;
+
+	unsigned int max_cursor_size;
 
 	if (NULL == stream) {
 		dm_error("DC: dc_stream is NULL!\n");
@@ -289,24 +292,38 @@ bool dc_stream_set_cursor_attributes(
 
 	dc = stream->ctx->dc;
 
-	/* SubVP is not compatible with HW cursor larger than 64 x 64 x 4.
-	 * Therefore, if cursor is greater than 64 x 64 x 4, fallback to SW cursor in the following case:
-	 * 1. If the config is a candidate for SubVP high refresh (both single an dual display configs)
-	 * 2. If not subvp high refresh, for single display cases, if resolution is >= 5K and refresh rate < 120hz
-	 * 3. If not subvp high refresh, for multi display cases, if resolution is >= 4K and refresh rate < 120hz
+	/* SubVP is not compatible with HW cursor larger than what can fit in cursor SRAM.
+	 * Therefore, if cursor is greater than this, fallback to SW cursor.
 	 */
-	if (dc->debug.allow_sw_cursor_fallback &&
-		attributes->height * attributes->width * 4 > 16384 &&
-		!stream->hw_cursor_req) {
-		if (check_subvp_sw_cursor_fallback_req(dc, stream))
+	if (dc->debug.allow_sw_cursor_fallback && dc->res_pool->funcs->get_max_hw_cursor_size) {
+		max_cursor_size = dc->res_pool->funcs->get_max_hw_cursor_size(dc, state, stream);
+		max_cursor_size = max_cursor_size * max_cursor_size * 4;
+
+		if (attributes->height * attributes->width * 4 > max_cursor_size) {
 			return false;
+		}
 	}
 
-	stream->cursor_attributes = *attributes;
-
 	return true;
 }
 
+/*
+ * dc_stream_set_cursor_attributes() - Update cursor attributes and set cursor surface address
+ */
+bool dc_stream_set_cursor_attributes(
+	struct dc_stream_state *stream,
+	const struct dc_cursor_attributes *attributes)
+{
+	bool result = false;
+
+	if (dc_stream_check_cursor_attributes(stream, stream->ctx->dc->current_state, attributes)) {
+		stream->cursor_attributes = *attributes;
+		result = true;
+	}
+
+	return result;
+}
+
 bool dc_stream_program_cursor_attributes(
 	struct dc_stream_state *stream,
 	const struct dc_cursor_attributes *attributes)
@@ -552,6 +569,14 @@ bool dc_stream_fc_disable_writeback(struct dc *dc,
 	return true;
 }
 
+/**
+ * dc_stream_remove_writeback() - Disables writeback and removes writeback info.
+ * @dc: Display core control structure.
+ * @stream: Display core stream state.
+ * @dwb_pipe_inst: Display writeback pipe.
+ *
+ * Return: returns true on success, false otherwise.
+ */
 bool dc_stream_remove_writeback(struct dc *dc,
 		struct dc_stream_state *stream,
 		uint32_t dwb_pipe_inst)
@@ -1109,3 +1134,26 @@ unsigned int dc_stream_get_max_flickerless_instant_vtotal_increase(struct dc_str
 
 	return dc_stream_get_max_flickerless_instant_vtotal_delta(stream, is_gaming, false);
 }
+
+bool dc_stream_is_cursor_limit_pending(struct dc *dc, struct dc_stream_state *stream)
+{
+	bool is_limit_pending = false;
+
+	if (dc->current_state)
+		is_limit_pending = dc_state_get_stream_cursor_subvp_limit(stream, dc->current_state);
+
+	return is_limit_pending;
+}
+
+bool dc_stream_can_clear_cursor_limit(struct dc *dc, struct dc_stream_state *stream)
+{
+	bool can_clear_limit = false;
+
+	if (dc->current_state)
+		can_clear_limit = dc_state_get_stream_cursor_subvp_limit(stream, dc->current_state) &&
+				(stream->hw_cursor_req ||
+				!stream->cursor_position.enable ||
+				dc_stream_check_cursor_attributes(stream, dc->current_state, &stream->cursor_attributes));
+
+	return can_clear_limit;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 7c2ee0526926..2230e36c4f12 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -53,7 +53,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.325"
+#define DC_VER "3.2.331"
 
 /**
  * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC
@@ -249,6 +249,7 @@ struct dc_caps {
 	uint32_t i2c_speed_in_khz_hdcp;
 	uint32_t dmdata_alloc_size;
 	unsigned int max_cursor_size;
+	unsigned int max_buffered_cursor_size;
 	unsigned int max_video_width;
 	/*
 	 * max video plane width that can be safely assumed to be always
@@ -282,6 +283,7 @@ struct dc_caps {
 	bool edp_dsc_support;
 	bool vbios_lttpr_aware;
 	bool vbios_lttpr_enable;
+	bool fused_io_supported;
 	uint32_t max_otg_num;
 	uint32_t max_cab_allocation_bytes;
 	uint32_t cache_line_size;
@@ -447,6 +449,7 @@ struct dc_config {
 	bool enable_windowed_mpo_odm;
 	bool forceHBR2CP2520; // Used for switching between test patterns TPS4 and CP2520
 	uint32_t allow_edp_hotplug_detection;
+	bool skip_riommu_prefetch_wa;
 	bool clamp_min_dcfclk;
 	uint64_t vblank_alignment_dto_params;
 	uint8_t  vblank_alignment_max_frame_time_diff;
@@ -496,6 +499,7 @@ enum visual_confirm {
 	VISUAL_CONFIRM_HW_CURSOR = 20,
 	VISUAL_CONFIRM_VABC = 21,
 	VISUAL_CONFIRM_DCC = 22,
+	VISUAL_CONFIRM_EXPLICIT = 0x80000000,
 };
 
 enum dc_psr_power_opts {
@@ -902,6 +906,9 @@ struct dc_debug_options {
 	bool voltage_align_fclk;
 	bool disable_min_fclk;
 
+	bool hdcp_lc_force_fw_enable;
+	bool hdcp_lc_enable_sw_fallback;
+
 	bool disable_dfs_bypass;
 	bool disable_dpp_power_gate;
 	bool disable_hubp_power_gate;
@@ -1418,6 +1425,171 @@ struct dc_scratch_space {
 	struct dc_stream_state stream_state;
 };
 
+/*
+ * A link contains one or more sinks and their connected status.
+ * The currently active signal type (HDMI, DP-SST, DP-MST) is also reported.
+ */
+ struct dc_link {
+	struct dc_sink *remote_sinks[MAX_SINKS_PER_LINK];
+	unsigned int sink_count;
+	struct dc_sink *local_sink;
+	unsigned int link_index;
+	enum dc_connection_type type;
+	enum signal_type connector_signal;
+	enum dc_irq_source irq_source_hpd;
+	enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse  */
+	enum dc_irq_source irq_source_read_request;/* Read Request */
+
+	bool is_hpd_filter_disabled;
+	bool dp_ss_off;
+
+	/**
+	 * @link_state_valid:
+	 *
+	 * If there is no link and local sink, this variable should be set to
+	 * false. Otherwise, it should be set to true; usually, the function
+	 * core_link_enable_stream sets this field to true.
+	 */
+	bool link_state_valid;
+	bool aux_access_disabled;
+	bool sync_lt_in_progress;
+	bool skip_stream_reenable;
+	bool is_internal_display;
+	/** @todo Rename. Flag an endpoint as having a programmable mapping to a DIG encoder. */
+	bool is_dig_mapping_flexible;
+	bool hpd_status; /* HPD status of link without physical HPD pin. */
+	bool is_hpd_pending; /* Indicates a new received hpd */
+
+	/* USB4 DPIA links skip verifying link cap, instead performing the fallback method
+	 * for every link training. This is incompatible with DP LL compliance automation,
+	 * which expects the same link settings to be used every retry on a link loss.
+	 * This flag is used to skip the fallback when link loss occurs during automation.
+	 */
+	bool skip_fallback_on_link_loss;
+
+	bool edp_sink_present;
+
+	struct dp_trace dp_trace;
+
+	/* caps is the same as reported_link_cap. link_traing use
+	 * reported_link_cap. Will clean up.  TODO
+	 */
+	struct dc_link_settings reported_link_cap;
+	struct dc_link_settings verified_link_cap;
+	struct dc_link_settings cur_link_settings;
+	struct dc_lane_settings cur_lane_setting[LANE_COUNT_DP_MAX];
+	struct dc_link_settings preferred_link_setting;
+	/* preferred_training_settings are override values that
+	 * come from DM. DM is responsible for the memory
+	 * management of the override pointers.
+	 */
+	struct dc_link_training_overrides preferred_training_settings;
+	struct dp_audio_test_data audio_test_data;
+
+	uint8_t ddc_hw_inst;
+
+	uint8_t hpd_src;
+
+	uint8_t link_enc_hw_inst;
+	/* DIG link encoder ID. Used as index in link encoder resource pool.
+	 * For links with fixed mapping to DIG, this is not changed after dc_link
+	 * object creation.
+	 */
+	enum engine_id eng_id;
+	enum engine_id dpia_preferred_eng_id;
+
+	bool test_pattern_enabled;
+	/* Pending/Current test pattern are only used to perform and track
+	 * FIXED_VS retimer test pattern/lane adjustment override state.
+	 * Pending allows link HWSS to differentiate PHY vs non-PHY pattern,
+	 * to perform specific lane adjust overrides before setting certain
+	 * PHY test patterns. In cases when lane adjust and set test pattern
+	 * calls are not performed atomically (i.e. performing link training),
+	 * pending_test_pattern will be invalid or contain a non-PHY test pattern
+	 * and current_test_pattern will contain required context for any future
+	 * set pattern/set lane adjust to transition between override state(s).
+	 * */
+	enum dp_test_pattern current_test_pattern;
+	enum dp_test_pattern pending_test_pattern;
+
+	union compliance_test_state compliance_test_state;
+
+	void *priv;
+
+	struct ddc_service *ddc;
+
+	enum dp_panel_mode panel_mode;
+	bool aux_mode;
+
+	/* Private to DC core */
+
+	const struct dc *dc;
+
+	struct dc_context *ctx;
+
+	struct panel_cntl *panel_cntl;
+	struct link_encoder *link_enc;
+	struct graphics_object_id link_id;
+	/* Endpoint type distinguishes display endpoints which do not have entries
+	 * in the BIOS connector table from those that do. Helps when tracking link
+	 * encoder to display endpoint assignments.
+	 */
+	enum display_endpoint_type ep_type;
+	union ddi_channel_mapping ddi_channel_mapping;
+	struct connector_device_tag_info device_tag;
+	struct dpcd_caps dpcd_caps;
+	uint32_t dongle_max_pix_clk;
+	unsigned short chip_caps;
+	unsigned int dpcd_sink_count;
+	struct hdcp_caps hdcp_caps;
+	enum edp_revision edp_revision;
+	union dpcd_sink_ext_caps dpcd_sink_ext_caps;
+
+	struct psr_settings psr_settings;
+	struct replay_settings replay_settings;
+
+	/* Drive settings read from integrated info table */
+	struct dc_lane_settings bios_forced_drive_settings;
+
+	/* Vendor specific LTTPR workaround variables */
+	uint8_t vendor_specific_lttpr_link_rate_wa;
+	bool apply_vendor_specific_lttpr_link_rate_wa;
+
+	/* MST record stream using this link */
+	struct link_flags {
+		bool dp_keep_receiver_powered;
+		bool dp_skip_DID2;
+		bool dp_skip_reset_segment;
+		bool dp_skip_fs_144hz;
+		bool dp_mot_reset_segment;
+		/* Some USB4 docks do not handle turning off MST DSC once it has been enabled. */
+		bool dpia_mst_dsc_always_on;
+		/* Forced DPIA into TBT3 compatibility mode. */
+		bool dpia_forced_tbt3_mode;
+		bool dongle_mode_timing_override;
+		bool blank_stream_on_ocs_change;
+		bool read_dpcd204h_on_irq_hpd;
+		bool force_dp_ffe_preset;
+	} wa_flags;
+	union dc_dp_ffe_preset forced_dp_ffe_preset;
+	struct link_mst_stream_allocation_table mst_stream_alloc_table;
+
+	struct dc_link_status link_status;
+	struct dprx_states dprx_states;
+
+	struct gpio *hpd_gpio;
+	enum dc_link_fec_state fec_state;
+	bool link_powered_externally;	// Used to bypass hardware sequencing delays when panel is powered down forcibly
+
+	struct dc_panel_config panel_config;
+	struct phy_state phy_state;
+	uint32_t phy_transition_bitmask;
+	// BW ALLOCATON USB4 ONLY
+	struct dc_dpia_bw_alloc dpia_bw_alloc_config;
+	bool skip_implict_edp_power_control;
+	enum backlight_control_type backlight_control_type;
+};
+
 struct dc {
 	struct dc_debug_options debug;
 	struct dc_versions versions;
@@ -1485,6 +1657,7 @@ struct dc {
 		struct dc_scratch_space current_state;
 		struct dc_scratch_space new_state;
 		struct dc_stream_state temp_stream; // Used so we don't need to allocate stream on the stack
+		struct dc_link temp_link;
 		bool pipes_to_unlock_first[MAX_PIPES]; /* Any of the pipes indicated here should be unlocked first */
 	} scratch;
 
@@ -1651,170 +1824,6 @@ uint32_t dc_bandwidth_in_kbps_from_timing(
 		const enum dc_link_encoding_format link_encoding);
 
 /* Link Interfaces */
-/*
- * A link contains one or more sinks and their connected status.
- * The currently active signal type (HDMI, DP-SST, DP-MST) is also reported.
- */
-struct dc_link {
-	struct dc_sink *remote_sinks[MAX_SINKS_PER_LINK];
-	unsigned int sink_count;
-	struct dc_sink *local_sink;
-	unsigned int link_index;
-	enum dc_connection_type type;
-	enum signal_type connector_signal;
-	enum dc_irq_source irq_source_hpd;
-	enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse  */
-
-	bool is_hpd_filter_disabled;
-	bool dp_ss_off;
-
-	/**
-	 * @link_state_valid:
-	 *
-	 * If there is no link and local sink, this variable should be set to
-	 * false. Otherwise, it should be set to true; usually, the function
-	 * core_link_enable_stream sets this field to true.
-	 */
-	bool link_state_valid;
-	bool aux_access_disabled;
-	bool sync_lt_in_progress;
-	bool skip_stream_reenable;
-	bool is_internal_display;
-	/** @todo Rename. Flag an endpoint as having a programmable mapping to a DIG encoder. */
-	bool is_dig_mapping_flexible;
-	bool hpd_status; /* HPD status of link without physical HPD pin. */
-	bool is_hpd_pending; /* Indicates a new received hpd */
-
-	/* USB4 DPIA links skip verifying link cap, instead performing the fallback method
-	 * for every link training. This is incompatible with DP LL compliance automation,
-	 * which expects the same link settings to be used every retry on a link loss.
-	 * This flag is used to skip the fallback when link loss occurs during automation.
-	 */
-	bool skip_fallback_on_link_loss;
-
-	bool edp_sink_present;
-
-	struct dp_trace dp_trace;
-
-	/* caps is the same as reported_link_cap. link_traing use
-	 * reported_link_cap. Will clean up.  TODO
-	 */
-	struct dc_link_settings reported_link_cap;
-	struct dc_link_settings verified_link_cap;
-	struct dc_link_settings cur_link_settings;
-	struct dc_lane_settings cur_lane_setting[LANE_COUNT_DP_MAX];
-	struct dc_link_settings preferred_link_setting;
-	/* preferred_training_settings are override values that
-	 * come from DM. DM is responsible for the memory
-	 * management of the override pointers.
-	 */
-	struct dc_link_training_overrides preferred_training_settings;
-	struct dp_audio_test_data audio_test_data;
-
-	uint8_t ddc_hw_inst;
-
-	uint8_t hpd_src;
-
-	uint8_t link_enc_hw_inst;
-	/* DIG link encoder ID. Used as index in link encoder resource pool.
-	 * For links with fixed mapping to DIG, this is not changed after dc_link
-	 * object creation.
-	 */
-	enum engine_id eng_id;
-	enum engine_id dpia_preferred_eng_id;
-
-	bool test_pattern_enabled;
-	/* Pending/Current test pattern are only used to perform and track
-	 * FIXED_VS retimer test pattern/lane adjustment override state.
-	 * Pending allows link HWSS to differentiate PHY vs non-PHY pattern,
-	 * to perform specific lane adjust overrides before setting certain
-	 * PHY test patterns. In cases when lane adjust and set test pattern
-	 * calls are not performed atomically (i.e. performing link training),
-	 * pending_test_pattern will be invalid or contain a non-PHY test pattern
-	 * and current_test_pattern will contain required context for any future
-	 * set pattern/set lane adjust to transition between override state(s).
-	 * */
-	enum dp_test_pattern current_test_pattern;
-	enum dp_test_pattern pending_test_pattern;
-
-	union compliance_test_state compliance_test_state;
-
-	void *priv;
-
-	struct ddc_service *ddc;
-
-	enum dp_panel_mode panel_mode;
-	bool aux_mode;
-
-	/* Private to DC core */
-
-	const struct dc *dc;
-
-	struct dc_context *ctx;
-
-	struct panel_cntl *panel_cntl;
-	struct link_encoder *link_enc;
-	struct graphics_object_id link_id;
-	/* Endpoint type distinguishes display endpoints which do not have entries
-	 * in the BIOS connector table from those that do. Helps when tracking link
-	 * encoder to display endpoint assignments.
-	 */
-	enum display_endpoint_type ep_type;
-	union ddi_channel_mapping ddi_channel_mapping;
-	struct connector_device_tag_info device_tag;
-	struct dpcd_caps dpcd_caps;
-	uint32_t dongle_max_pix_clk;
-	unsigned short chip_caps;
-	unsigned int dpcd_sink_count;
-	struct hdcp_caps hdcp_caps;
-	enum edp_revision edp_revision;
-	union dpcd_sink_ext_caps dpcd_sink_ext_caps;
-
-	struct psr_settings psr_settings;
-	struct replay_settings replay_settings;
-
-	/* Drive settings read from integrated info table */
-	struct dc_lane_settings bios_forced_drive_settings;
-
-	/* Vendor specific LTTPR workaround variables */
-	uint8_t vendor_specific_lttpr_link_rate_wa;
-	bool apply_vendor_specific_lttpr_link_rate_wa;
-
-	/* MST record stream using this link */
-	struct link_flags {
-		bool dp_keep_receiver_powered;
-		bool dp_skip_DID2;
-		bool dp_skip_reset_segment;
-		bool dp_skip_fs_144hz;
-		bool dp_mot_reset_segment;
-		/* Some USB4 docks do not handle turning off MST DSC once it has been enabled. */
-		bool dpia_mst_dsc_always_on;
-		/* Forced DPIA into TBT3 compatibility mode. */
-		bool dpia_forced_tbt3_mode;
-		bool dongle_mode_timing_override;
-		bool blank_stream_on_ocs_change;
-		bool read_dpcd204h_on_irq_hpd;
-		bool force_dp_ffe_preset;
-	} wa_flags;
-	union dc_dp_ffe_preset forced_dp_ffe_preset;
-	struct link_mst_stream_allocation_table mst_stream_alloc_table;
-
-	struct dc_link_status link_status;
-	struct dprx_states dprx_states;
-
-	struct gpio *hpd_gpio;
-	enum dc_link_fec_state fec_state;
-	bool link_powered_externally;	// Used to bypass hardware sequencing delays when panel is powered down forcibly
-
-	struct dc_panel_config panel_config;
-	struct phy_state phy_state;
-	uint32_t phy_transition_bitmask;
-	// BW ALLOCATON USB4 ONLY
-	struct dc_dpia_bw_alloc dpia_bw_alloc_config;
-	bool skip_implict_edp_power_control;
-	enum backlight_control_type backlight_control_type;
-};
-
 /* Return an enumerated dc_link.
  * dc_link order is constant and determined at
  * boot time.  They cannot be created or destroyed.
@@ -2589,10 +2598,18 @@ unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context);
 /* DSC Interfaces */
 #include "dc_dsc.h"
 
+void dc_get_visual_confirm_for_stream(
+	struct dc *dc,
+	struct dc_stream_state *stream_state,
+	struct tg_color *color);
+
 /* Disable acc mode Interfaces */
 void dc_disable_accelerated_mode(struct dc *dc);
 
 bool dc_is_timing_changed(struct dc_stream_state *cur_stream,
 		       struct dc_stream_state *new_stream);
 
+bool dc_is_cursor_limit_pending(struct dc *dc);
+bool dc_can_clear_cursor_limit(struct dc *dc);
+
 #endif /* DC_INTERFACE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 614e03bfd598..6115b5364394 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -70,20 +70,28 @@ void dc_dmub_srv_destroy(struct dc_dmub_srv **dmub_srv)
 	}
 }
 
-void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv)
+bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv)
 {
-	struct dmub_srv *dmub = dc_dmub_srv->dmub;
-	struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+	struct dmub_srv *dmub;
+	struct dc_context *dc_ctx;
 	enum dmub_status status;
 
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return false;
+
+	dc_ctx = dc_dmub_srv->ctx;
+	dmub = dc_dmub_srv->dmub;
+
 	do {
-		status = dmub_srv_wait_for_idle(dmub, 100000);
+		status = dmub_srv_wait_for_pending(dmub, 100000);
 	} while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK);
 
 	if (status != DMUB_STATUS_OK) {
 		DC_ERROR("Error waiting for DMUB idle: status=%d\n", status);
 		dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
 	}
+
+	return status == DMUB_STATUS_OK;
 }
 
 void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dc_dmub_srv)
@@ -126,7 +134,49 @@ void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dc_dmub_srv,
 	}
 }
 
-bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
+static bool dc_dmub_srv_reg_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
+		unsigned int count,
+		union dmub_rb_cmd *cmd_list)
+{
+	struct dc_context *dc_ctx;
+	struct dmub_srv *dmub;
+	enum dmub_status status = DMUB_STATUS_OK;
+	int i;
+
+	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+		return false;
+
+	dc_ctx = dc_dmub_srv->ctx;
+	dmub = dc_dmub_srv->dmub;
+
+	for (i = 0 ; i < count; i++) {
+		/* confirm no messages pending */
+		do {
+			status = dmub_srv_wait_for_idle(dmub, 100000);
+		} while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK);
+
+		/* queue command */
+		if (status == DMUB_STATUS_OK)
+			status = dmub_srv_reg_cmd_execute(dmub, &cmd_list[i]);
+
+		/* check for errors */
+		if (status != DMUB_STATUS_OK) {
+			break;
+		}
+	}
+
+	if (status != DMUB_STATUS_OK) {
+		if (status != DMUB_STATUS_POWER_STATE_D3) {
+			DC_ERROR("Error starting DMUB execution: status=%d\n", status);
+			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+		}
+		return false;
+	}
+
+	return true;
+}
+
+static bool dc_dmub_srv_fb_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
 		unsigned int count,
 		union dmub_rb_cmd *cmd_list)
 {
@@ -143,20 +193,25 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
 
 	for (i = 0 ; i < count; i++) {
 		// Queue command
-		status = dmub_srv_cmd_queue(dmub, &cmd_list[i]);
+		if (!cmd_list[i].cmd_common.header.multi_cmd_pending ||
+				dmub_rb_num_free(&dmub->inbox1.rb) >= count - i) {
+			status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]);
+		} else {
+			status = DMUB_STATUS_QUEUE_FULL;
+		}
 
 		if (status == DMUB_STATUS_QUEUE_FULL) {
 			/* Execute and wait for queue to become empty again. */
-			status = dmub_srv_cmd_execute(dmub);
+			status = dmub_srv_fb_cmd_execute(dmub);
 			if (status == DMUB_STATUS_POWER_STATE_D3)
 				return false;
 
 			do {
-				status = dmub_srv_wait_for_idle(dmub, 100000);
+				status = dmub_srv_wait_for_inbox_free(dmub, 100000, count - i);
 			} while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK);
 
 			/* Requeue the command. */
-			status = dmub_srv_cmd_queue(dmub, &cmd_list[i]);
+			status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]);
 		}
 
 		if (status != DMUB_STATUS_OK) {
@@ -168,7 +223,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
 		}
 	}
 
-	status = dmub_srv_cmd_execute(dmub);
+	status = dmub_srv_fb_cmd_execute(dmub);
 	if (status != DMUB_STATUS_OK) {
 		if (status != DMUB_STATUS_POWER_STATE_D3) {
 			DC_ERROR("Error starting DMUB execution: status=%d\n", status);
@@ -180,6 +235,23 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
 	return true;
 }
 
+bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
+		unsigned int count,
+		union dmub_rb_cmd *cmd_list)
+{
+	bool res = false;
+
+	if (dc_dmub_srv && dc_dmub_srv->dmub) {
+		if (dc_dmub_srv->dmub->inbox_type == DMUB_CMD_INTERFACE_REG) {
+			res = dc_dmub_srv_reg_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list);
+		} else {
+			res = dc_dmub_srv_fb_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list);
+		}
+	}
+
+	return res;
+}
+
 bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv,
 		enum dm_dmub_wait_type wait_type,
 		union dmub_rb_cmd *cmd_list)
@@ -202,7 +274,8 @@ bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv,
 			DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status);
 			if (!dmub->debug.timeout_info.timeout_occured) {
 				dmub->debug.timeout_info.timeout_occured = true;
-				dmub->debug.timeout_info.timeout_cmd = *cmd_list;
+				if (cmd_list)
+					dmub->debug.timeout_info.timeout_cmd = *cmd_list;
 				dmub->debug.timeout_info.timestamp = dm_get_timestamp(dc_dmub_srv->ctx);
 			}
 			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
@@ -210,8 +283,9 @@ bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv,
 		}
 
 		// Copy data back from ring buffer into command
-		if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)
-			dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list);
+		if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY && cmd_list) {
+			dmub_srv_cmd_get_response(dc_dmub_srv->dmub, cmd_list);
+		}
 	}
 
 	return true;
@@ -224,74 +298,10 @@ bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd
 
 bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list, enum dm_dmub_wait_type wait_type)
 {
-	struct dc_context *dc_ctx;
-	struct dmub_srv *dmub;
-	enum dmub_status status;
-	int i;
-
-	if (!dc_dmub_srv || !dc_dmub_srv->dmub)
-		return false;
-
-	dc_ctx = dc_dmub_srv->ctx;
-	dmub = dc_dmub_srv->dmub;
-
-	for (i = 0 ; i < count; i++) {
-		// Queue command
-		status = dmub_srv_cmd_queue(dmub, &cmd_list[i]);
-
-		if (status == DMUB_STATUS_QUEUE_FULL) {
-			/* Execute and wait for queue to become empty again. */
-			status = dmub_srv_cmd_execute(dmub);
-			if (status == DMUB_STATUS_POWER_STATE_D3)
-				return false;
-
-			status = dmub_srv_wait_for_idle(dmub, 100000);
-			if (status != DMUB_STATUS_OK)
-				return false;
-
-			/* Requeue the command. */
-			status = dmub_srv_cmd_queue(dmub, &cmd_list[i]);
-		}
-
-		if (status != DMUB_STATUS_OK) {
-			if (status != DMUB_STATUS_POWER_STATE_D3) {
-				DC_ERROR("Error queueing DMUB command: status=%d\n", status);
-				dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
-			}
-			return false;
-		}
-	}
-
-	status = dmub_srv_cmd_execute(dmub);
-	if (status != DMUB_STATUS_OK) {
-		if (status != DMUB_STATUS_POWER_STATE_D3) {
-			DC_ERROR("Error starting DMUB execution: status=%d\n", status);
-			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
-		}
+	if (!dc_dmub_srv_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list))
 		return false;
-	}
-
-	// Wait for DMUB to process command
-	if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) {
-		if (dc_dmub_srv->ctx->dc->debug.disable_timeout) {
-			do {
-				status = dmub_srv_wait_for_idle(dmub, 100000);
-			} while (status != DMUB_STATUS_OK);
-		} else
-			status = dmub_srv_wait_for_idle(dmub, 100000);
-
-		if (status != DMUB_STATUS_OK) {
-			DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status);
-			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
-			return false;
-		}
-
-		// Copy data back from ring buffer into command
-		if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)
-			dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list);
-	}
 
-	return true;
+	return dc_dmub_srv_wait_for_idle(dc_dmub_srv, wait_type, cmd_list);
 }
 
 bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv)
@@ -1243,7 +1253,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
 			ips_fw->signals.bits.ips1_commit,
 			ips_fw->signals.bits.ips2_commit);
 
-		dc_dmub_srv_wait_idle(dc->ctx->dmub_srv);
+		dc_dmub_srv_wait_for_idle(dc->ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL);
 
 		memset(&new_signals, 0, sizeof(new_signals));
 
@@ -1355,14 +1365,15 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 			if (!dc->debug.optimize_ips_handshake || !ips_fw->signals.bits.ips2_commit)
 				udelay(dc->debug.ips2_eval_delay_us);
 
-			if (ips_fw->signals.bits.ips2_commit) {
-				DC_LOG_IPS(
-					"exit IPS2 #1 (ips1_commit=%u ips2_commit=%u)",
-					ips_fw->signals.bits.ips1_commit,
-					ips_fw->signals.bits.ips2_commit);
+			DC_LOG_IPS(
+				"exit IPS2 #1 (ips1_commit=%u ips2_commit=%u)",
+				ips_fw->signals.bits.ips1_commit,
+				ips_fw->signals.bits.ips2_commit);
 
-				// Tell PMFW to exit low power state
-				dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
+			// Tell PMFW to exit low power state
+			dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
+
+			if (ips_fw->signals.bits.ips2_commit) {
 
 				DC_LOG_IPS(
 					"wait IPS2 entry delay (ips1_commit=%u ips2_commit=%u)",
@@ -1400,7 +1411,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 					ips_fw->signals.bits.ips1_commit,
 					ips_fw->signals.bits.ips2_commit);
 
-				dmub_srv_sync_inbox1(dc->ctx->dmub_srv->dmub);
+				dmub_srv_sync_inboxes(dc->ctx->dmub_srv->dmub);
 			}
 		}
 
@@ -1654,7 +1665,8 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc,
 	/* fill in generic command header */
 	global_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
 	global_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG;
-	global_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header);
+	global_cmd->header.payload_bytes =
+			sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header);
 
 	if (enable) {
 		/* send global configuration parameters */
@@ -1673,11 +1685,13 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc,
 			/* configure command header */
 			stream_base_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
 			stream_base_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG;
-			stream_base_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header);
+			stream_base_cmd->header.payload_bytes =
+					sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header);
 			stream_base_cmd->header.multi_cmd_pending = 1;
 			stream_sub_state_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
 			stream_sub_state_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG;
-			stream_sub_state_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header);
+			stream_sub_state_cmd->header.payload_bytes =
+					sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header);
 			stream_sub_state_cmd->header.multi_cmd_pending = 1;
 			/* copy stream static base state */
 			memcpy(&stream_base_cmd->config,
@@ -1723,7 +1737,8 @@ void dc_dmub_srv_fams2_drr_update(struct dc *dc,
 	cmd.fams2_drr_update.dmub_optc_state_req.v_total_mid_frame_num = vtotal_mid_frame_num;
 	cmd.fams2_drr_update.dmub_optc_state_req.program_manual_trigger = program_manual_trigger;
 
-	cmd.fams2_drr_update.header.payload_bytes = sizeof(cmd.fams2_drr_update) - sizeof(cmd.fams2_drr_update.header);
+	cmd.fams2_drr_update.header.payload_bytes =
+			sizeof(cmd.fams2_drr_update) - sizeof(cmd.fams2_drr_update.header);
 
 	dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 }
@@ -1759,7 +1774,8 @@ void dc_dmub_srv_fams2_passthrough_flip(
 		/* build command header */
 		cmds[num_cmds].fams2_flip.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
 		cmds[num_cmds].fams2_flip.header.sub_type = DMUB_CMD__FAMS2_FLIP;
-		cmds[num_cmds].fams2_flip.header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2_flip);
+		cmds[num_cmds].fams2_flip.header.payload_bytes =
+				sizeof(struct dmub_rb_cmd_fams2_flip) - sizeof(struct dmub_cmd_header);
 
 		/* for chaining multiple commands, all but last command should set to 1 */
 		cmds[num_cmds].fams2_flip.header.multi_cmd_pending = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
index a636f4c3f01d..ada5c2fb2db3 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -58,7 +58,7 @@ struct dc_dmub_srv {
 	bool needs_idle_wake;
 };
 
-void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv);
+bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv);
 
 bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_fused_io.c b/drivers/gpu/drm/amd/display/dc/dc_fused_io.c
new file mode 100644
index 000000000000..fee69642fb93
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_fused_io.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2025 Advanced Micro Devices, Inc.
+
+#include "dc_fused_io.h"
+
+#include "dm_helpers.h"
+#include "gpio.h"
+
+static bool op_i2c_convert(
+		union dmub_rb_cmd *cmd,
+		const struct mod_hdcp_atomic_op_i2c *op,
+		enum dmub_cmd_fused_request_type type,
+		uint32_t ddc_line,
+		bool over_aux
+)
+{
+	struct dmub_cmd_fused_request *req = &cmd->fused_io.request;
+	struct dmub_cmd_fused_request_location_i2c *loc = &req->u.i2c;
+
+	if (!op || op->size > sizeof(req->buffer))
+		return false;
+
+	req->type = type;
+	loc->is_aux = false;
+	loc->ddc_line = ddc_line;
+	loc->over_aux = over_aux;
+	loc->address = op->address;
+	loc->offset = op->offset;
+	loc->length = op->size;
+	memcpy(req->buffer, op->data, op->size);
+
+	return true;
+}
+
+static bool op_aux_convert(
+		union dmub_rb_cmd *cmd,
+		const struct mod_hdcp_atomic_op_aux *op,
+		enum dmub_cmd_fused_request_type type,
+		uint32_t ddc_line
+)
+{
+	struct dmub_cmd_fused_request *req = &cmd->fused_io.request;
+	struct dmub_cmd_fused_request_location_aux *loc = &req->u.aux;
+
+	if (!op || op->size > sizeof(req->buffer))
+		return false;
+
+	req->type = type;
+	loc->is_aux = true;
+	loc->ddc_line = ddc_line;
+	loc->address = op->address;
+	loc->length = op->size;
+	memcpy(req->buffer, op->data, op->size);
+
+	return true;
+}
+
+static bool atomic_write_poll_read(
+		struct dc_link *link,
+		union dmub_rb_cmd commands[3],
+		uint32_t poll_timeout_us,
+		uint8_t poll_mask_msb
+)
+{
+	const uint8_t count = 3;
+	const uint32_t timeout_per_request_us = 10000;
+	const uint32_t timeout_per_aux_transaction_us = 10000;
+	uint64_t timeout_us = 0;
+
+	commands[1].fused_io.request.poll_mask_msb = poll_mask_msb;
+	commands[1].fused_io.request.timeout_us = poll_timeout_us;
+
+	for (uint8_t i = 0; i < count; i++) {
+		struct dmub_rb_cmd_fused_io *io = &commands[i].fused_io;
+
+		io->header.type = DMUB_CMD__FUSED_IO;
+		io->header.sub_type = DMUB_CMD__FUSED_IO_EXECUTE;
+		io->header.multi_cmd_pending = i != count - 1;
+		io->header.payload_bytes = sizeof(commands[i].fused_io) - sizeof(io->header);
+
+		timeout_us += timeout_per_request_us + io->request.timeout_us;
+		if (!io->request.timeout_us && io->request.u.aux.is_aux)
+			timeout_us += timeout_per_aux_transaction_us * (io->request.u.aux.length / 16);
+	}
+
+	if (!dm_helpers_execute_fused_io(link->ctx, link, commands, count, timeout_us))
+		return false;
+
+	return commands[0].fused_io.request.status == FUSED_REQUEST_STATUS_SUCCESS;
+}
+
+bool dm_atomic_write_poll_read_i2c(
+		struct dc_link *link,
+		const struct mod_hdcp_atomic_op_i2c *write,
+		const struct mod_hdcp_atomic_op_i2c *poll,
+		struct mod_hdcp_atomic_op_i2c *read,
+		uint32_t poll_timeout_us,
+		uint8_t poll_mask_msb
+)
+{
+	if (!link)
+		return false;
+
+	const bool over_aux = false;
+	const uint32_t ddc_line = link->ddc->ddc_pin->pin_data->en;
+
+	union dmub_rb_cmd commands[3] = { 0 };
+	const bool converted = op_i2c_convert(&commands[0], write, FUSED_REQUEST_WRITE, ddc_line, over_aux)
+			&& op_i2c_convert(&commands[1], poll, FUSED_REQUEST_POLL, ddc_line, over_aux)
+			&& op_i2c_convert(&commands[2], read, FUSED_REQUEST_READ, ddc_line, over_aux);
+
+	if (!converted)
+		return false;
+
+	const bool result = atomic_write_poll_read(link, commands, poll_timeout_us, poll_mask_msb);
+
+	memcpy(read->data, commands[0].fused_io.request.buffer, read->size);
+	return result;
+}
+
+bool dm_atomic_write_poll_read_aux(
+		struct dc_link *link,
+		const struct mod_hdcp_atomic_op_aux *write,
+		const struct mod_hdcp_atomic_op_aux *poll,
+		struct mod_hdcp_atomic_op_aux *read,
+		uint32_t poll_timeout_us,
+		uint8_t poll_mask_msb
+)
+{
+	if (!link)
+		return false;
+
+	const uint32_t ddc_line = link->ddc->ddc_pin->pin_data->en;
+	union dmub_rb_cmd commands[3] = { 0 };
+	const bool converted = op_aux_convert(&commands[0], write, FUSED_REQUEST_WRITE, ddc_line)
+			&& op_aux_convert(&commands[1], poll, FUSED_REQUEST_POLL, ddc_line)
+			&& op_aux_convert(&commands[2], read, FUSED_REQUEST_READ, ddc_line);
+
+	if (!converted)
+		return false;
+
+	const bool result = atomic_write_poll_read(link, commands, poll_timeout_us, poll_mask_msb);
+
+	memcpy(read->data, commands[0].fused_io.request.buffer, read->size);
+	return result;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dc_fused_io.h b/drivers/gpu/drm/amd/display/dc/dc_fused_io.h
new file mode 100644
index 000000000000..c74917240985
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_fused_io.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef __DC_FUSED_IO_H__
+#define __DC_FUSED_IO_H__
+
+#include "dc.h"
+#include "mod_hdcp.h"
+
+bool dm_atomic_write_poll_read_i2c(
+		struct dc_link *link,
+		const struct mod_hdcp_atomic_op_i2c *write,
+		const struct mod_hdcp_atomic_op_i2c *poll,
+		struct mod_hdcp_atomic_op_i2c *read,
+		uint32_t poll_timeout_us,
+		uint8_t poll_mask_msb
+);
+
+bool dm_atomic_write_poll_read_aux(
+		struct dc_link *link,
+		const struct mod_hdcp_atomic_op_aux *write,
+		const struct mod_hdcp_atomic_op_aux *poll,
+		struct mod_hdcp_atomic_op_aux *read,
+		uint32_t poll_timeout_us,
+		uint8_t poll_mask_msb
+);
+
+#endif  // __DC_FUSED_IO_H__
+
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index 8f077e15b4f0..7217de258851 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -682,13 +682,19 @@ void reg_sequence_wait_done(const struct dc_context *ctx)
 	if (offload &&
 	    ctx->dc->debug.dmub_offload_enabled &&
 	    !ctx->dc->debug.dmcub_emulation) {
-		dc_dmub_srv_wait_idle(ctx->dmub_srv);
+		dc_dmub_srv_wait_for_idle(ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL);
 	}
 }
 
 char *dce_version_to_string(const int version)
 {
 	switch (version) {
+	case DCE_VERSION_6_0:
+		return "DCE 6.0";
+	case DCE_VERSION_6_1:
+		return "DCE 6.1";
+	case DCE_VERSION_6_4:
+		return "DCE 6.4";
 	case DCE_VERSION_8_0:
 		return "DCE 8.0";
 	case DCE_VERSION_8_1:
diff --git a/drivers/gpu/drm/amd/display/dc/dc_state_priv.h b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
index 1a12ef579ff4..1d9bae56ff6a 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
@@ -105,4 +105,24 @@ bool dc_state_is_fams2_in_use(
 		const struct dc *dc,
 		const struct dc_state *state);
 
+
+void dc_state_set_stream_subvp_cursor_limit(const struct dc_stream_state *stream,
+		struct dc_state *state,
+		bool limit);
+
+bool dc_state_get_stream_subvp_cursor_limit(const struct dc_stream_state *stream,
+		struct dc_state *state);
+
+void dc_state_set_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+		struct dc_state *state,
+		bool limit);
+
+bool dc_state_get_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+		struct dc_state *state);
+
+bool dc_state_can_clear_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+		struct dc_state *state);
+
+bool dc_state_is_subvp_in_use(struct dc_state *state);
+
 #endif /* _DC_STATE_PRIV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index e0bfddaa23e3..341d2ffb64b1 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -44,6 +44,8 @@ struct mall_stream_config {
 	 */
 	enum mall_stream_type type;
 	struct dc_stream_state *paired_stream;	// master / slave stream
+	bool subvp_limit_cursor_size; /* stream has/is using subvp limiting hw cursor support */
+	bool cursor_size_limit_subvp; /* stream is using hw cursor config preventing subvp */
 };
 
 struct dc_stream_status {
@@ -503,6 +505,11 @@ void program_cursor_position(
 	struct dc *dc,
 	struct dc_stream_state *stream);
 
+bool dc_stream_check_cursor_attributes(
+	const struct dc_stream_state *stream,
+	struct dc_state *state,
+	const struct dc_cursor_attributes *attributes);
+
 bool dc_stream_set_cursor_attributes(
 	struct dc_stream_state *stream,
 	const struct dc_cursor_attributes *attributes);
@@ -579,4 +586,8 @@ void dc_dmub_update_dirty_rect(struct dc *dc,
 			       struct dc_stream_state *stream,
 			       struct dc_surface_update *srf_updates,
 			       struct dc_state *context);
+
+bool dc_stream_is_cursor_limit_pending(struct dc *dc, struct dc_stream_state *stream);
+bool dc_stream_can_clear_cursor_limit(struct dc *dc, struct dc_stream_state *stream);
+
 #endif /* DC_STREAM_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 83ffaae9f439..a4cd0eb39a3a 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -210,6 +210,7 @@ struct dc_edid_caps {
 
 	bool edid_hdmi;
 	bool hdr_supported;
+	bool rr_capable;
 
 	struct dc_panel_patch panel_patch;
 };
@@ -1089,7 +1090,8 @@ union replay_low_refresh_rate_enable_options {
 	struct {
 	//BIT[0-3]: Replay Low Hz Support control
 		unsigned int ENABLE_LOW_RR_SUPPORT          :1;
-		unsigned int RESERVED_1_3                   :3;
+		unsigned int SKIP_ASIC_CHECK                :1;
+		unsigned int RESERVED_2_3                   :2;
 	//BIT[4-15]: Replay Low Hz Enable Scenarios
 		unsigned int ENABLE_STATIC_SCREEN           :1;
 		unsigned int ENABLE_FULL_SCREEN_VIDEO       :1;
@@ -1129,6 +1131,10 @@ struct replay_config {
 	union replay_low_refresh_rate_enable_options low_rr_enable_options;
 	/* Replay coasting vtotal is within low refresh rate range. */
 	bool low_rr_activated;
+	/* Replay low refresh rate supported*/
+	bool low_rr_supported;
+	/* Replay Video Conferencing Optimization Enabled */
+	bool replay_video_conferencing_optimization_enabled;
 };
 
 /* Replay feature flags*/
@@ -1249,6 +1255,7 @@ enum dc_cm2_gpu_mem_layout {
 
 enum dc_cm2_gpu_mem_pixel_component_order {
 	DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA,
+	DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_BGRA
 };
 
 enum dc_cm2_gpu_mem_format {
@@ -1270,7 +1277,8 @@ struct dc_cm2_gpu_mem_format_parameters {
 
 enum dc_cm2_gpu_mem_size {
 	DC_CM2_GPU_MEM_SIZE_171717,
-	DC_CM2_GPU_MEM_SIZE_TRANSFORMED
+	DC_CM2_GPU_MEM_SIZE_333333,
+	DC_CM2_GPU_MEM_SIZE_TRANSFORMED,
 };
 
 struct dc_cm2_gpu_mem_parameters {
@@ -1279,6 +1287,7 @@ struct dc_cm2_gpu_mem_parameters {
 	struct dc_cm2_gpu_mem_format_parameters format_params;
 	enum dc_cm2_gpu_mem_pixel_component_order component_order;
 	enum dc_cm2_gpu_mem_size  size;
+	uint16_t bit_depth;
 };
 
 enum dc_cm2_transfer_func_source {
@@ -1302,6 +1311,10 @@ struct dc_cm2_func_luts {
 			const struct dc_3dlut *lut3d_func;
 			struct dc_cm2_gpu_mem_parameters gpu_mem_params;
 		};
+		bool rmcm_3dlut_shaper_select;
+		bool mpc_3dlut_enable;
+		bool rmcm_3dlut_enable;
+		bool mpc_mcm_post_blend;
 	} lut3d_data;
 	const struct dc_transfer_func *lut1d_func;
 };
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
index b363f5360818..58c84f555c0f 100644
--- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
@@ -391,6 +391,7 @@ static void dccg35_set_dppclk_rcg(struct dccg *dccg,
 
 	struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
 
+
 	if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && enable)
 		return;
 
@@ -411,6 +412,8 @@ static void dccg35_set_dppclk_rcg(struct dccg *dccg,
 	BREAK_TO_DEBUGGER();
 		break;
 	}
+	//DC_LOG_DEBUG("%s: inst(%d) DPPCLK rcg_disable: %d\n", __func__, inst, enable ? 0 : 1);
+
 }
 
 static void dccg35_set_dpstreamclk_rcg(
@@ -1035,6 +1038,7 @@ static void dccg35_enable_dpp_clk_new(
 			  DPPCLK0_DTO_MODULO, 0xFF);
 }
 
+
 static void dccg35_disable_dpp_clk_new(
 	struct dccg *dccg,
 	int inst)
@@ -1112,30 +1116,24 @@ static void dcn35_set_dppclk_enable(struct dccg *dccg,
 {
 	struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
 
+
 	switch (dpp_inst) {
 	case 0:
 		REG_UPDATE(DPPCLK_CTRL, DPPCLK0_EN, enable);
-		if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp)
-			REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable);
 		break;
 	case 1:
 		REG_UPDATE(DPPCLK_CTRL, DPPCLK1_EN, enable);
-		if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp)
-			REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, enable);
 		break;
 	case 2:
 		REG_UPDATE(DPPCLK_CTRL, DPPCLK2_EN, enable);
-		if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp)
-			REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, enable);
 		break;
 	case 3:
 		REG_UPDATE(DPPCLK_CTRL, DPPCLK3_EN, enable);
-		if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp)
-			REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, enable);
 		break;
 	default:
 		break;
 	}
+	//DC_LOG_DEBUG("%s: dpp_inst(%d) DPPCLK_EN = %d\n", __func__, dpp_inst, enable);
 
 }
 
@@ -1163,14 +1161,18 @@ static void dccg35_update_dpp_dto(struct dccg *dccg, int dpp_inst,
 			ASSERT(false);
 			phase = 0xff;
 		}
+		dccg35_set_dppclk_rcg(dccg, dpp_inst, false);
 
 		REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
 				DPPCLK0_DTO_PHASE, phase,
 				DPPCLK0_DTO_MODULO, modulo);
 
 		dcn35_set_dppclk_enable(dccg, dpp_inst, true);
-	} else
+	} else {
 		dcn35_set_dppclk_enable(dccg, dpp_inst, false);
+		/*we have this in hwss: disable_plane*/
+		//dccg35_set_dppclk_rcg(dccg, dpp_inst, true);
+	}
 	dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk;
 }
 
@@ -1182,6 +1184,7 @@ static void dccg35_set_dppclk_root_clock_gating(struct dccg *dccg,
 	if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp)
 		return;
 
+
 	switch (dpp_inst) {
 	case 0:
 		REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable);
@@ -1198,6 +1201,8 @@ static void dccg35_set_dppclk_root_clock_gating(struct dccg *dccg,
 	default:
 		break;
 	}
+	//DC_LOG_DEBUG("%s: dpp_inst(%d) rcg: %d\n", __func__, dpp_inst, enable);
+
 }
 
 static void dccg35_get_pixel_rate_div(
@@ -1521,28 +1526,30 @@ static void dccg35_set_physymclk_root_clock_gating(
 	switch (phy_inst) {
 	case 0:
 		REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
-				PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
+				PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
 		break;
 	case 1:
 		REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
-				PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
+				PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
 		break;
 	case 2:
 		REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
-				PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
+				PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
 		break;
 	case 3:
 		REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
-				PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
+				PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
 		break;
 	case 4:
 		REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
-				PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0);
+				PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
 		break;
 	default:
 		BREAK_TO_DEBUGGER();
 		return;
 	}
+	//DC_LOG_DEBUG("%s: dpp_inst(%d) PHYESYMCLK_ROOT_GATE_DISABLE:\n", __func__, phy_inst, enable ? 0 : 1);
+
 }
 
 static void dccg35_set_physymclk(
@@ -1643,6 +1650,8 @@ static void dccg35_dpp_root_clock_control(
 		return;
 
 	if (clock_on) {
+		dccg35_set_dppclk_rcg(dccg, dpp_inst, false);
+
 		/* turn off the DTO and leave phase/modulo at max */
 		dcn35_set_dppclk_enable(dccg, dpp_inst, 1);
 		REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
@@ -1654,6 +1663,8 @@ static void dccg35_dpp_root_clock_control(
 		REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
 			  DPPCLK0_DTO_PHASE, 0,
 			  DPPCLK0_DTO_MODULO, 1);
+		/*we have this in hwss: disable_plane*/
+		//dccg35_set_dppclk_rcg(dccg, dpp_inst, true);
 	}
 
 	dccg->dpp_clock_gated[dpp_inst] = !clock_on;
@@ -1771,36 +1782,40 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst)
 	//Disable DTO
 	switch (inst) {
 	case 0:
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 1);
+
 		REG_UPDATE_2(DSCCLK0_DTO_PARAM,
 				DSCCLK0_DTO_PHASE, 0,
 				DSCCLK0_DTO_MODULO, 0);
 		REG_UPDATE(DSCCLK_DTO_CTRL,	DSCCLK0_EN, 1);
-		if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
-			REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 1);
 		break;
 	case 1:
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 1);
+
 		REG_UPDATE_2(DSCCLK1_DTO_PARAM,
 				DSCCLK1_DTO_PHASE, 0,
 				DSCCLK1_DTO_MODULO, 0);
 		REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1);
-		if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
-			REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 1);
 		break;
 	case 2:
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 1);
+
 		REG_UPDATE_2(DSCCLK2_DTO_PARAM,
 				DSCCLK2_DTO_PHASE, 0,
 				DSCCLK2_DTO_MODULO, 0);
 		REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1);
-		if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
-			REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 1);
 		break;
 	case 3:
+		if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
+			REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 1);
+
 		REG_UPDATE_2(DSCCLK3_DTO_PARAM,
 				DSCCLK3_DTO_PHASE, 0,
 				DSCCLK3_DTO_MODULO, 0);
 		REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 1);
-		if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
-			REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 1);
 		break;
 	default:
 		BREAK_TO_DEBUGGER();
@@ -1813,9 +1828,6 @@ static void dccg35_disable_dscclk(struct dccg *dccg,
 {
 	struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
 
-	if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
-		return;
-
 	switch (inst) {
 	case 0:
 		REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 0);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index 077337698e0a..b4f5b4a6331a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -976,11 +976,12 @@ static bool dcn31_program_pix_clk(
 	struct bp_pixel_clock_parameters bp_pc_params = {0};
 	enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
 
-	// Apply ssed(spread spectrum) dpref clock for edp only.
-	if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0
-		&& pix_clk_params->signal_type == SIGNAL_TYPE_EDP
-		&& encoding == DP_8b_10b_ENCODING)
+	// Apply ssed(spread spectrum) dpref clock for edp and dp
+	if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0 &&
+		dc_is_dp_signal(pix_clk_params->signal_type) &&
+		encoding == DP_8b_10b_ENCODING)
 		dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
+
 	// For these signal types Driver to program DP_DTO without calling VBIOS Command table
 	if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) {
 		if (e) {
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h
index 0721ae895ae9..94128f7a18b1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h
@@ -257,7 +257,7 @@ bool dce110_clk_src_construct(
 	struct dce110_clk_src *clk_src,
 	struct dc_context *ctx,
 	struct dc_bios *bios,
-	enum clock_source_id,
+	enum clock_source_id id,
 	const struct dce110_clk_src_regs *regs,
 	const struct dce110_clk_src_shift *cs_shift,
 	const struct dce110_clk_src_mask *cs_mask);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
index ccc154b0281c..3b9011ef9b68 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
@@ -28,6 +28,8 @@
 #include "dc.h"
 #include "core_types.h"
 #include "dmub_cmd.h"
+#include "dc_dmub_srv.h"
+#include "dmub/dmub_srv.h"
 
 #define TO_DMUB_ABM(abm)\
 	container_of(abm, struct dce_abm, base)
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
index 0d7e7f3b81a1..a641ae04450c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
@@ -240,7 +240,8 @@ bool dmub_abm_save_restore(
 	cmd.abm_save_restore.abm_init_config_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
 	cmd.abm_save_restore.abm_init_config_data.panel_mask = panel_mask;
 
-	cmd.abm_save_restore.header.payload_bytes = sizeof(struct dmub_rb_cmd_abm_save_restore);
+	cmd.abm_save_restore.header.payload_bytes =
+			sizeof(struct dmub_rb_cmd_abm_save_restore) - sizeof(struct dmub_cmd_header);
 
 	dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
index c31e4f26a305..fcd3d86ad517 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
@@ -280,7 +280,9 @@ static void dmub_replay_set_power_opt_and_coasting_vtotal(struct dmub_replay *dm
 	memset(&cmd, 0, sizeof(cmd));
 	pCmd->header.type = DMUB_CMD__REPLAY;
 	pCmd->header.sub_type = DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL;
-	pCmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal);
+	pCmd->header.payload_bytes =
+			sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal) -
+			sizeof(struct dmub_cmd_header);
 	pCmd->replay_set_power_opt_data.power_opt = power_opt;
 	pCmd->replay_set_power_opt_data.panel_inst = panel_inst;
 	pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF);
@@ -319,7 +321,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub,
 		cmd.replay_set_timing_sync.header.sub_type =
 			DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED;
 		cmd.replay_set_timing_sync.header.payload_bytes =
-			sizeof(struct dmub_rb_cmd_replay_set_timing_sync);
+			sizeof(struct dmub_rb_cmd_replay_set_timing_sync) -
+			sizeof(struct dmub_cmd_header);
 		//Cmd Body
 		cmd.replay_set_timing_sync.replay_set_timing_sync_data.panel_inst =
 						cmd_element->sync_data.panel_inst;
@@ -331,7 +334,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub,
 		cmd.replay_set_frameupdate_timer.header.sub_type =
 			DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER;
 		cmd.replay_set_frameupdate_timer.header.payload_bytes =
-			sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer);
+			sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer) -
+			sizeof(struct dmub_cmd_header);
 		//Cmd Body
 		cmd.replay_set_frameupdate_timer.data.panel_inst =
 						cmd_element->panel_inst;
@@ -345,7 +349,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub,
 		cmd.replay_set_pseudo_vtotal.header.sub_type =
 			DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL;
 		cmd.replay_set_pseudo_vtotal.header.payload_bytes =
-			sizeof(struct dmub_rb_cmd_replay_set_pseudo_vtotal);
+			sizeof(struct dmub_rb_cmd_replay_set_pseudo_vtotal) -
+			sizeof(struct dmub_cmd_header);
 		//Cmd Body
 		cmd.replay_set_pseudo_vtotal.data.panel_inst =
 			cmd_element->pseudo_vtotal_data.panel_inst;
@@ -357,7 +362,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub,
 		cmd.replay_disabled_adaptive_sync_sdp.header.sub_type =
 			DMUB_CMD__REPLAY_DISABLED_ADAPTIVE_SYNC_SDP;
 		cmd.replay_disabled_adaptive_sync_sdp.header.payload_bytes =
-			sizeof(struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp);
+			sizeof(struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp) -
+			sizeof(struct dmub_cmd_header);
 		//Cmd Body
 		cmd.replay_disabled_adaptive_sync_sdp.data.panel_inst =
 			cmd_element->disabled_adaptive_sync_sdp_data.panel_inst;
@@ -369,7 +375,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub,
 		cmd.replay_set_general_cmd.header.sub_type =
 			DMUB_CMD__REPLAY_SET_GENERAL_CMD;
 		cmd.replay_set_general_cmd.header.payload_bytes =
-			sizeof(struct dmub_rb_cmd_replay_set_general_cmd);
+			sizeof(struct dmub_rb_cmd_replay_set_general_cmd) -
+			sizeof(struct dmub_cmd_header);
 		//Cmd Body
 		cmd.replay_set_general_cmd.data.panel_inst =
 			cmd_element->set_general_cmd_data.panel_inst;
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/Makefile b/drivers/gpu/drm/amd/display/dc/dce60/Makefile
index eede83ad91fa..824f73eb3326 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce60/Makefile
@@ -25,8 +25,7 @@
 
 CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = -Wno-override-init
 
-DCE60 = dce60_timing_generator.o dce60_hw_sequencer.o \
-	dce60_resource.o
+DCE60 = dce60_timing_generator.o
 
 AMD_DAL_DCE60 = $(addprefix $(AMDDALPATH)/dc/dce60/,$(DCE60))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
index 003a9330c286..88e7a1fc9a30 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
@@ -105,7 +105,7 @@ static void program_pix_dur(struct timing_generator *tg, uint32_t pix_clk_100hz)
 	dm_write_reg(tg->ctx, addr, value);
 }
 
-static void program_timing(struct timing_generator *tg,
+static void dce80_timing_generator_program_timing(struct timing_generator *tg,
 	const struct dc_crtc_timing *timing,
 	int vready_offset,
 	int vstartup_start,
@@ -185,7 +185,7 @@ static void dce80_timing_generator_enable_advanced_request(
 
 static const struct timing_generator_funcs dce80_tg_funcs = {
 		.validate_timing = dce110_tg_validate_timing,
-		.program_timing = program_timing,
+		.program_timing = dce80_timing_generator_program_timing,
 		.enable_crtc = dce110_timing_generator_enable_crtc,
 		.disable_crtc = dce110_timing_generator_disable_crtc,
 		.is_counter_moving = dce110_timing_generator_is_counter_moving,
diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
index 5efddd48d5c5..9d160b39e8c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
@@ -153,6 +153,14 @@ bool dm_helpers_submit_i2c(
 		const struct dc_link *link,
 		struct i2c_command *cmd);
 
+bool dm_helpers_execute_fused_io(
+		struct dc_context *ctx,
+		struct dc_link *link,
+		union dmub_rb_cmd *commands,
+		uint8_t count,
+		uint32_t timeout_us
+);
+
 bool dm_helpers_dp_write_dsc_enable(
 		struct dc_context *ctx,
 		const struct dc_stream_state *stream,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index f1fe49401bc0..8d24763938ea 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -1002,6 +1002,7 @@ static bool CalculatePrefetchSchedule(
 
 	dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime
 			- (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
+	dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
 
 	Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC);
 	Tsw_oto = Lsw_oto * LineTime;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index f567a9023682..ed59c77bc6f6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -1105,6 +1105,7 @@ static bool CalculatePrefetchSchedule(
 	Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
 	dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
 	dst_y_prefetch_equ =  VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
+	dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
 	dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
 	Tpre_rounded = dst_y_prefetch_equ * LineTime;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
index 5865e8fa2d8e..9f3938a50240 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
@@ -1123,6 +1123,7 @@ static bool CalculatePrefetchSchedule(
 	Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
 	dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
 	dst_y_prefetch_equ =  VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
+	dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
 	dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
 	Tpre_rounded = dst_y_prefetch_equ * LineTime;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 56dda686e299..b0fc1fd20208 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -627,6 +627,7 @@ static bool dcn32_assign_subvp_pipe(struct dc *dc,
 		 */
 		if (pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe && !dcn32_is_center_timing(pipe) &&
 				!pipe->stream->hw_cursor_req &&
+				!dc_state_get_stream_cursor_subvp_limit(pipe->stream, context) &&
 				!(pipe->stream->timing.pix_clk_100hz / 10000 > DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ) &&
 				(!dcn32_is_psr_capable(pipe) || (context->stream_count == 1 && dc->caps.dmub_caps.subvp_psr)) &&
 				dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE &&
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index 21fd466dba26..157ecf008d6c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -99,7 +99,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml21_wrapper.o := $(dml2_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/inc/dml2_debug.o := $(dml2_ccflags)
 
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_rcflags)
@@ -117,11 +116,9 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_floa
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml21_wrapper.o := $(dml2_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/inc/dml2_debug.o := $(dml2_rcflags)
 
 DML21 := src/dml2_top/dml2_top_interfaces.o
 DML21 += src/dml2_top/dml2_top_soc15.o
-DML21 += src/inc/dml2_debug.o
 DML21 += src/dml2_core/dml2_core_dcn4.o
 DML21 += src/dml2_core/dml2_core_factory.o
 DML21 += src/dml2_core/dml2_core_dcn4_calcs.o
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
index 0c8ec30ea672..ed75319a07d5 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
@@ -526,7 +526,8 @@ static void populate_dml21_output_config_from_stream_state(struct dml2_link_outp
 
 static void populate_dml21_stream_overrides_from_stream_state(
 		struct dml2_stream_parameters *stream_desc,
-		struct dc_stream_state *stream)
+		struct dc_stream_state *stream,
+		struct dc_stream_status *stream_status)
 {
 	switch (stream->debug.force_odm_combine_segments) {
 	case 0:
@@ -551,7 +552,9 @@ static void populate_dml21_stream_overrides_from_stream_state(
 	if (!stream->ctx->dc->debug.enable_single_display_2to1_odm_policy ||
 			stream->debug.force_odm_combine_segments > 0)
 		stream_desc->overrides.disable_dynamic_odm = true;
-	stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp || stream->hw_cursor_req;
+	stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp ||
+			stream->hw_cursor_req ||
+			stream_status->mall_stream_config.cursor_size_limit_subvp;
 }
 
 static enum dml2_swizzle_mode gfx_addr3_to_dml2_swizzle_mode(enum swizzle_mode_addr3_values addr3_mode)
@@ -885,6 +888,9 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm
 		case DC_CM2_GPU_MEM_SIZE_171717:
 			plane->tdlut.tdlut_width_mode = dml2_tdlut_width_17_cube;
 			break;
+		case DC_CM2_GPU_MEM_SIZE_333333:
+			plane->tdlut.tdlut_width_mode = dml2_tdlut_width_33_cube;
+			break;
 		case DC_CM2_GPU_MEM_SIZE_TRANSFORMED:
 			//plane->tdlut.tdlut_width_mode = dml2_tdlut_width_flatten; // dml2_tdlut_width_flatten undefined
 			break;
@@ -1021,7 +1027,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s
 		populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], dml_ctx);
 		adjust_dml21_hblank_timing_config_from_pipe_ctx(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, &context->res_ctx.pipe_ctx[stream_index]);
 		populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]);
-		populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index]);
+		populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index], &context->stream_status[stream_index]);
 
 		dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.fclk_pstate = dml2_twait_budgeting_setting_if_needed;
 		dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.uclk_pstate = dml2_twait_budgeting_setting_if_needed;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c
index 5d16f36ec95c..ed6584535e89 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c
@@ -234,7 +234,9 @@ static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_s
 	if (!result)
 		return false;
 
+	DC_FP_START();
 	result = dml2_build_mode_programming(mode_programming);
+	DC_FP_END();
 	if (!result)
 		return false;
 
@@ -277,7 +279,9 @@ static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *co
 	mode_support->dml2_instance = dml_init->dml2_instance;
 	dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx);
 	dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming;
+	DC_FP_START();
 	is_supported = dml2_check_mode_supported(mode_support);
+	DC_FP_END();
 	if (!is_supported)
 		return false;
 
@@ -288,16 +292,12 @@ bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml
 {
 	bool out = false;
 
-	DC_FP_START();
-
 	/* Use dml_validate_only for fast_validate path */
 	if (fast_validate)
 		out = dml21_check_mode_support(in_dc, context, dml_ctx);
 	else
 		out = dml21_mode_check_and_programming(in_dc, context, dml_ctx);
 
-	DC_FP_END();
-
 	return out;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h
index a64ec4dcf11a..c047d56527c4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h
@@ -43,4 +43,5 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o
  */
 bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out);
 
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h
index 25b607e7b726..84c90050668c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h
@@ -156,6 +156,8 @@ struct dml2_dchub_watermark_regs {
 	uint32_t urgent;
 	uint32_t sr_enter;
 	uint32_t sr_exit;
+	uint32_t sr_enter_z8;
+	uint32_t sr_exit_z8;
 	uint32_t uclk_pstate;
 	uint32_t fclk_pstate;
 	uint32_t temp_read_or_ppt;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h
index 5e1ab6d97640..255f05de362c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h
@@ -166,7 +166,7 @@ struct dml2_surface_cfg {
 	enum dml2_swizzle_mode tiling;
 
 	struct {
-		unsigned long pitch;
+		unsigned long pitch; // In elements, two pixels per element in 422 packed format
 		unsigned long width;
 		unsigned long height;
 	} plane0;
@@ -385,6 +385,7 @@ struct dml2_plane_parameters {
 		long reserved_vblank_time_ns;
 		unsigned int max_vactive_det_fill_delay_us; // 0 = no reserved time, +ve = explicit max delay
 		unsigned int gpuvm_min_page_size_kbytes;
+		unsigned int hostvm_min_page_size_kbytes;
 
 		enum dml2_svp_mode_override legacy_svp_config; //TODO remove in favor of svp_config
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c
index bb863c8c6b39..6ee37386f672 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c
@@ -456,10 +456,10 @@ bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out)
 		in_out->mode_support_result.global.active.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] * 1000), 1.0);
 		in_out->mode_support_result.global.svp_prefetch.average_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0);
 		in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0);
-		dml2_printf("DML::%s: in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_sdp_kbps);
-		dml2_printf("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps);
-		dml2_printf("DML::%s: in_out->mode_support_result.global.active.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_dram_kbps);
-		dml2_printf("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps);
+		DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_sdp_kbps);
+		DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps);
+		DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_dram_kbps);
+		DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps);
 
 		for (i = 0; i < l->svp_expanded_display_cfg.num_planes; i++) {
 			in_out->mode_support_result.per_plane[i].dppclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDPPCLK[i] * 1000);
@@ -509,7 +509,7 @@ bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out)
 			stream_index = l->svp_expanded_display_cfg.plane_descriptors[i].stream_index;
 
 			in_out->mode_support_result.per_stream[stream_index].dscclk_khz = (unsigned int)core->clean_me_up.mode_lib.ms.required_dscclk_freq_mhz[i] * 1000;
-			dml2_printf("CORE_DCN4::%s: i=%d stream_index=%d, in_out->mode_support_result.per_stream[stream_index].dscclk_khz = %u\n", __func__, i, stream_index, in_out->mode_support_result.per_stream[stream_index].dscclk_khz);
+			DML_LOG_VERBOSE("CORE_DCN4::%s: i=%d stream_index=%d, in_out->mode_support_result.per_stream[stream_index].dscclk_khz = %u\n", __func__, i, stream_index, in_out->mode_support_result.per_stream[stream_index].dscclk_khz);
 
 			if (!((stream_bitmask >> stream_index) & 0x1)) {
 				in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].odms_used = odm_count;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
index 4c504cb0e1c5..c4dad7164d31 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
@@ -54,104 +54,104 @@ static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned
 
 static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only)
 {
-	dml2_printf("DML: ===================================== \n");
-	dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n");
+	DML_LOG_VERBOSE("DML: ===================================== \n");
+	DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n");
 	if (!fail_only || support->ScaleRatioAndTapsSupport == 0)
-		dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport);
+		DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport);
 	if (!fail_only || support->SourceFormatPixelAndScanSupport == 0)
-		dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport);
+		DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport);
 	if (!fail_only || support->ViewportSizeSupport == 0)
-		dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport);
+		DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport);
 	if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1)
-		dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion);
+		DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion);
 	if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1)
-		dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated);
+		DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated);
 	if (!fail_only || support->BPPForMultistreamNotIndicated == 1)
-		dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated);
+		DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated);
 	if (!fail_only || support->MultistreamWithHDMIOreDP == 1)
-		dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP);
+		DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP);
 	if (!fail_only || support->ExceededMultistreamSlots == 1)
-		dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots);
+		DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots);
 	if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1)
-		dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink);
+		DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink);
 	if (!fail_only || support->NotEnoughLanesForMSO == 1)
-		dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO);
+		DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO);
 	if (!fail_only || support->P2IWith420 == 1)
-		dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420);
+		DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420);
 	if (!fail_only || support->DSC422NativeNotSupported == 1)
-		dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported);
+		DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported);
 	if (!fail_only || support->DSCSlicesODMModeSupported == 0)
-		dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported);
+		DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported);
 	if (!fail_only || support->NotEnoughDSCUnits == 1)
-		dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits);
+		DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits);
 	if (!fail_only || support->NotEnoughDSCSlices == 1)
-		dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices);
+		DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices);
 	if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1)
-		dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe);
+		DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe);
 	if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1)
-		dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen);
+		DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen);
 	if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1)
-		dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported);
+		DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported);
 	if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0)
-		dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport);
+		DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport);
 	if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1)
-		dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported);
+		DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported);
 	if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1)
-		dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState);
+		DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState);
 	if (!fail_only || support->ROBSupport == 0)
-		dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport);
+		DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport);
 	if (!fail_only || support->OutstandingRequestsSupport == 0)
-		dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport);
+		DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport);
 	if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0)
-		dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance);
+		DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance);
 	if (!fail_only || support->DISPCLK_DPPCLK_Support == 0)
-		dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support);
+		DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support);
 	if (!fail_only || support->TotalAvailablePipesSupport == 0)
-		dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport);
+		DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport);
 	if (!fail_only || support->NumberOfOTGSupport == 0)
-		dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport);
+		DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport);
 	if (!fail_only || support->NumberOfHDMIFRLSupport == 0)
-		dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport);
+		DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport);
 	if (!fail_only || support->NumberOfDP2p0Support == 0)
-		dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support);
+		DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support);
 	if (!fail_only || support->EnoughWritebackUnits == 0)
-		dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits);
+		DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits);
 	if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0)
-		dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport);
+		DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport);
 	if (!fail_only || support->WritebackLatencySupport == 0)
-		dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport);
+		DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport);
 	if (!fail_only || support->CursorSupport == 0)
-		dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport);
+		DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport);
 	if (!fail_only || support->PitchSupport == 0)
-		dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport);
+		DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport);
 	if (!fail_only || support->ViewportExceedsSurface == 1)
-		dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface);
+		DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface);
 	if (!fail_only || support->PrefetchSupported == 0)
-		dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported);
+		DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported);
 	if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0)
-		dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport);
+		DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport);
 	if (!fail_only || support->AvgBandwidthSupport == 0)
-		dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport);
+		DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport);
 	if (!fail_only || support->DynamicMetadataSupported == 0)
-		dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported);
+		DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported);
 	if (!fail_only || support->VRatioInPrefetchSupported == 0)
-		dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported);
+		DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported);
 	if (!fail_only || support->PTEBufferSizeNotExceeded == 0)
-		dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded);
+		DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded);
 	if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0)
-		dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded);
+		DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded);
 	if (!fail_only || support->ExceededMALLSize == 1)
-		dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize);
+		DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize);
 	if (!fail_only || support->g6_temp_read_support == 0)
-		dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support);
+		DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support);
 	if (!fail_only || support->ImmediateFlipSupport == 0)
-		dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport);
+		DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport);
 	if (!fail_only || support->LinkCapacitySupport == 0)
-		dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport);
+		DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport);
 
 	if (!fail_only || support->ModeSupport == 0)
-		dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport);
-	dml2_printf("DML: ===================================== \n");
+		DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport);
+	DML_LOG_VERBOSE("DML: ===================================== \n");
 }
 
 static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg)
@@ -179,11 +179,9 @@ static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg
 		} else {
 			out_bpp[k] = 0;
 		}
-#ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc);
-		dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable);
-		dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]);
-#endif
+		DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc);
+		DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable);
+		DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]);
 	}
 }
 
@@ -212,9 +210,7 @@ static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const stru
 		num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used;
 	}
 
-#ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes);
-#endif
+	DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes);
 	return num_active_pipes;
 }
 
@@ -251,7 +247,7 @@ static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg,
 	unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
 
 	bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]);
-	dml2_printf("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom);
+	DML_LOG_VERBOSE("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom);
 	return is_phantom;
 }
 
@@ -415,19 +411,17 @@ static void CalculateMaxDETAndMinCompressedBufferSize(
 	*nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte));
 	*MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
 
-#if defined(__DML_VBA_DEBUG__)
-	dml2_printf("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present);
-	dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
-	dml2_printf("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte);
-	dml2_printf("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP);
-	dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte);
-	dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte);
-	dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte);
-#endif
+	DML_LOG_VERBOSE("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present);
+	DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
+	DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte);
+	DML_LOG_VERBOSE("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP);
+	DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte);
+	DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte);
+	DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte);
 
 	if (nomDETInKByteOverrideEnable) {
 		*nomDETInKByte = nomDETInKByteOverrideValue;
-		dml2_printf("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte);
+		DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte);
 	}
 }
 
@@ -502,7 +496,7 @@ static bool dml_is_420(enum dml2_source_format_class source_format)
 		val = 0;
 		break;
 	default:
-		DML2_ASSERT(0);
+		DML_ASSERT(0);
 		break;
 	}
 	return val;
@@ -535,7 +529,7 @@ static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode
 	else if (sw_mode == dml2_gfx11_sw_256kb_r_x)
 		return 262144;
 	else {
-		DML2_ASSERT(0);
+		DML_ASSERT(0);
 		return 256;
 	}
 }
@@ -570,8 +564,8 @@ static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode)
 		sw_mode == dml2_gfx11_sw_256kb_r_x) {
 		version = 11;
 	} else {
-		dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode);
-		DML2_ASSERT(0);
+		DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode);
+		DML_ASSERT(0);
 	}
 
 	return version;
@@ -645,21 +639,19 @@ static void CalculateBytePerPixelAndBlockSizes(
 		*BytePerPixelY = 2;
 		*BytePerPixelC = 4;
 	} else {
-		dml2_printf("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat);
-		DML2_ASSERT(0);
+		DML_LOG_VERBOSE("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat);
+		DML_ASSERT(0);
 	}
 
-#ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
-	dml2_printf("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
-	dml2_printf("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
-	dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
-	dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
-	dml2_printf("DML::%s: pitch_y = %u\n", __func__, pitch_y);
-	dml2_printf("DML::%s: pitch_c = %u\n", __func__, pitch_c);
-	dml2_printf("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l);
-	dml2_printf("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c);
-#endif
+	DML_LOG_VERBOSE("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
+	DML_LOG_VERBOSE("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
+	DML_LOG_VERBOSE("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
+	DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
+	DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
+	DML_LOG_VERBOSE("DML::%s: pitch_y = %u\n", __func__, pitch_y);
+	DML_LOG_VERBOSE("DML::%s: pitch_c = %u\n", __func__, pitch_c);
+	DML_LOG_VERBOSE("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l);
+	DML_LOG_VERBOSE("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c);
 
 	if (dml_get_gfx_version(SurfaceTiling) == 11) {
 		*surf_linear128_l = 0;
@@ -703,12 +695,10 @@ static void CalculateBytePerPixelAndBlockSizes(
 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
 		*BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
 	}
-#ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
-	dml2_printf("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
-	dml2_printf("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
-	dml2_printf("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
-#endif
+	DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
+	DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
+	DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
+	DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
 
 	if (dml_get_gfx_version(SurfaceTiling) == 11) {
 		if (SurfaceTiling == dml2_gfx11_sw_linear) {
@@ -752,8 +742,8 @@ static void CalculateBytePerPixelAndBlockSizes(
 		} else if (SurfaceTiling == dml2_sw_256kb_2d) {
 			macro_tile_scale = 32;
 		} else {
-			dml2_printf("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling);
-			DML2_ASSERT(0);
+			DML_LOG_VERBOSE("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling);
+			DML_ASSERT(0);
 		}
 
 		*MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY;
@@ -766,12 +756,10 @@ static void CalculateBytePerPixelAndBlockSizes(
 		}
 	}
 
-#ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
-	dml2_printf("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
-	dml2_printf("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
-	dml2_printf("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
-#endif
+	DML_LOG_VERBOSE("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
+	DML_LOG_VERBOSE("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
+	DML_LOG_VERBOSE("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
+	DML_LOG_VERBOSE("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
 }
 
 static void CalculateSinglePipeDPPCLKAndSCLThroughput(
@@ -860,10 +848,8 @@ static void CalculateSwathWidth(
 	unsigned int surface_width_ub_c;
 	unsigned int surface_height_ub_c;
 
-#ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
-	dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
-#endif
+	DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
+	DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
 
 	for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
 		if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
@@ -872,11 +858,9 @@ static void CalculateSwathWidth(
 			SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
 		}
 
-#ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
-		dml2_printf("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
-		dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
-#endif
+		DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
+		DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
+		DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
 
 		MainSurfaceODMMode = ODMMode[k];
 
@@ -899,13 +883,11 @@ static void CalculateSwathWidth(
 			}
 		}
 
-#ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u HActive=%u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active);
-		dml2_printf("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
-		dml2_printf("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode);
-		dml2_printf("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]);
-		dml2_printf("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]);
-#endif
+		DML_LOG_VERBOSE("DML::%s: k=%u HActive=%lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active);
+		DML_LOG_VERBOSE("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
+		DML_LOG_VERBOSE("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode);
+		DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]);
 
 		if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
 			SwathWidthC[k] = SwathWidthY[k] / 2;
@@ -934,22 +916,20 @@ static void CalculateSwathWidth(
 		surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c);
 		surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]);
 
-#ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l);
-		dml2_printf("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l);
-		dml2_printf("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c);
-		dml2_printf("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c);
-		dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
-		dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
-		dml2_printf("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]);
-		dml2_printf("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]);
-		dml2_printf("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]);
-		dml2_printf("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]);
-		dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
-		dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
-		dml2_printf("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary);
-		dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
-#endif
+		DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l);
+		DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l);
+		DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c);
+		DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c);
+		DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
+		DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
+		DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
+		DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
+		DML_LOG_VERBOSE("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary);
+		DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
 
 		req_per_swath_ub_l[k] = 0;
 		req_per_swath_ub_c[k] = 0;
@@ -995,15 +975,12 @@ static void CalculateSwathWidth(
 			}
 		}
 
-#ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]);
-		dml2_printf("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]);
-		dml2_printf("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]);
-		dml2_printf("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]);
-		dml2_printf("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]);
-		dml2_printf("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]);
-#endif
-
+		DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]);
 	}
 }
 
@@ -1018,13 +995,11 @@ static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsi
 	if (unb_req_force_en) {
 		unb_req_en = unb_req_force_val && unb_req_ok;
 	}
-#ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en);
-	dml2_printf("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val);
-	dml2_printf("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok);
-	dml2_printf("DML::%s: unb_req_en = %u\n", __func__, unb_req_en);
-#endif
-	return (unb_req_en);
+	DML_LOG_VERBOSE("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en);
+	DML_LOG_VERBOSE("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val);
+	DML_LOG_VERBOSE("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok);
+	DML_LOG_VERBOSE("DML::%s: unb_req_en = %u\n", __func__, unb_req_en);
+	return unb_req_en;
 }
 
 static void CalculateDETBufferSize(
@@ -1054,16 +1029,14 @@ static void CalculateDETBufferSize(
 	bool NextPotentialSurfaceToAssignDETPieceFound;
 	bool MinimizeReallocationSuccess = false;
 
-#ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
-	dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
-	dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
-	dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled);
-	dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte);
-	dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
-	dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte);
-	dml2_printf("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte);
-#endif
+	DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
+	DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
+	DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
+	DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled);
+	DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte);
+	DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
+	DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte);
+	DML_LOG_VERBOSE("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte);
 
 	// Note: Will use default det size if that fits 2 swaths
 	if (UnboundedRequestEnabled) {
@@ -1092,19 +1065,15 @@ static void CalculateDETBufferSize(
 				l->minDET = l->minDET + ConfigReturnBufferSegmentSizeInkByte;
 			}
 
-#ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET);
-			dml2_printf("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET);
-			dml2_printf("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe);
-			dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]);
-			dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]);
-#endif
+			DML_LOG_VERBOSE("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET);
+			DML_LOG_VERBOSE("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET);
+			DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe);
+			DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]);
 
 			if (l->minDET_pipe == 0) {
 				l->minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)));
-#ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe);
-#endif
+				DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe);
 			}
 
 			if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
@@ -1117,12 +1086,10 @@ static void CalculateDETBufferSize(
 				l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe;
 			}
 
-#ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
-			dml2_printf("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb);
-			dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
-			dml2_printf("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte);
-#endif
+			DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb);
+			DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
+			DML_LOG_VERBOSE("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte);
 		}
 
 		if (display_cfg->minimize_det_reallocation) {
@@ -1194,14 +1161,12 @@ static void CalculateDETBufferSize(
 					l->TotalBandwidth = l->TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
 				}
 			}
-#ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
+			DML_LOG_VERBOSE("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
 			for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
-				dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
 			}
-			dml2_printf("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
-#endif
-			dml2_printf("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth);
+			DML_LOG_VERBOSE("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
+			DML_LOG_VERBOSE("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth);
 			l->BandwidthOfSurfacesNotAssignedDETPiece = l->TotalBandwidth;
 			for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
 
@@ -1213,10 +1178,8 @@ static void CalculateDETBufferSize(
 				} else {
 					DETPieceAssignedToThisSurfaceAlready[k] = false;
 				}
-#ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]);
-				dml2_printf("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece);
-#endif
+				DML_LOG_VERBOSE("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece);
 			}
 
 			for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) {
@@ -1224,22 +1187,18 @@ static void CalculateDETBufferSize(
 				l->NextSurfaceToAssignDETPiece = 0;
 
 				for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
-#ifdef __DML_VBA_DEBUG__
-					dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]);
-					dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]);
-					dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
-					dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
-					dml2_printf("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece);
-#endif
+					DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]);
+					DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]);
+					DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
+					DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
+					DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece);
 					if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound ||
 						ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece])) {
 						l->NextSurfaceToAssignDETPiece = k;
 						NextPotentialSurfaceToAssignDETPieceFound = true;
 					}
-#ifdef __DML_VBA_DEBUG__
-					dml2_printf("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
-					dml2_printf("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
-#endif
+					DML_LOG_VERBOSE("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
+					DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
 				}
 
 				if (NextPotentialSurfaceToAssignDETPieceFound) {
@@ -1249,20 +1208,16 @@ static void CalculateDETBufferSize(
 						* (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte,
 						math_floor2((double)l->DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)));
 
-#ifdef __DML_VBA_DEBUG__
-					dml2_printf("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte);
-					dml2_printf("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece);
-					dml2_printf("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
-					dml2_printf("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
-					dml2_printf("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece);
-					dml2_printf("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte);
-					dml2_printf("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
-#endif
+					DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte);
+					DML_LOG_VERBOSE("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece);
+					DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
+					DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
+					DML_LOG_VERBOSE("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece);
+					DML_LOG_VERBOSE("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte);
+					DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
 
 					DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] + l->NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]);
-#ifdef __DML_VBA_DEBUG__
-					dml2_printf("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
-#endif
+					DML_LOG_VERBOSE("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
 
 					l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - l->NextDETBufferPieceInKByte;
 					DETPieceAssignedToThisSurfaceAlready[l->NextSurfaceToAssignDETPiece] = true;
@@ -1274,13 +1229,11 @@ static void CalculateDETBufferSize(
 	}
 	*CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByte / ConfigReturnBufferSegmentSizeInkByte;
 
-#ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: --- After bandwidth adjustment ---\n", __func__);
-	dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte);
+	DML_LOG_VERBOSE("DML::%s: --- After bandwidth adjustment ---\n", __func__);
+	DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte);
 	for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
-		dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
 	}
-#endif
 }
 
 static double CalculateRequiredDispclk(
@@ -1510,15 +1463,13 @@ static unsigned int dscceComputeDelay(
 	//pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format)
 	pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay;
 
-#ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: bpc: %u\n", __func__, bpc);
-	dml2_printf("DML::%s: BPP: %f\n", __func__, BPP);
-	dml2_printf("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
-	dml2_printf("DML::%s: numSlices: %u\n", __func__, numSlices);
-	dml2_printf("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
-	dml2_printf("DML::%s: Output: %u\n", __func__, Output);
-	dml2_printf("DML::%s: pixels: %u\n", __func__, pixels);
-#endif
+	DML_LOG_VERBOSE("DML::%s: bpc: %u\n", __func__, bpc);
+	DML_LOG_VERBOSE("DML::%s: BPP: %f\n", __func__, BPP);
+	DML_LOG_VERBOSE("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
+	DML_LOG_VERBOSE("DML::%s: numSlices: %u\n", __func__, numSlices);
+	DML_LOG_VERBOSE("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
+	DML_LOG_VERBOSE("DML::%s: Output: %u\n", __func__, Output);
+	DML_LOG_VERBOSE("DML::%s: pixels: %u\n", __func__, pixels);
 	return pixels;
 }
 
@@ -1593,10 +1544,8 @@ static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, e
 
 	// sft
 	Delay = Delay + 1;
-#ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
-	dml2_printf("DML::%s: Delay = %u\n", __func__, Delay);
-#endif
+	DML_LOG_VERBOSE("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
+	DML_LOG_VERBOSE("DML::%s: Delay = %u\n", __func__, Delay);
 
 	return Delay;
 }
@@ -1667,10 +1616,8 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_
 	}
 
 	meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0);
-#ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch);
-	dml2_printf("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes);
-#endif
+	DML_LOG_VERBOSE("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch);
+	DML_LOG_VERBOSE("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes);
 	if (p->GPUVMEnable == true) {
 		double meta_vmpg_bytes = 4.0 * 1024.0;
 		*p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double) (meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64);
@@ -1724,25 +1671,23 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_
 
 	vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes;
 
-#ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable);
-	dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
-	dml2_printf("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear);
-	dml2_printf("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel);
-	dml2_printf("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels);
-	dml2_printf("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes);
-	dml2_printf("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes);
-	dml2_printf("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight);
-	dml2_printf("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth);
-	dml2_printf("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub);
-	dml2_printf("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub);
-	dml2_printf("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes);
-	dml2_printf("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes);
-	dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
-	dml2_printf("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight);
-	dml2_printf("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth);
-	dml2_printf("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
-#endif
+	DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable);
+	DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
+	DML_LOG_VERBOSE("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear);
+	DML_LOG_VERBOSE("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel);
+	DML_LOG_VERBOSE("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels);
+	DML_LOG_VERBOSE("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes);
+	DML_LOG_VERBOSE("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes);
+	DML_LOG_VERBOSE("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight);
+	DML_LOG_VERBOSE("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth);
+	DML_LOG_VERBOSE("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub);
+	DML_LOG_VERBOSE("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub);
+	DML_LOG_VERBOSE("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes);
+	DML_LOG_VERBOSE("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes);
+	DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
+	DML_LOG_VERBOSE("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight);
+	DML_LOG_VERBOSE("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth);
+	DML_LOG_VERBOSE("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
 
 	if (p->SurfaceTiling == dml2_sw_linear) {
 		*p->PixelPTEReqHeight = 1;
@@ -1778,22 +1723,20 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_
 		*p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
 
 		if (p->GPUVMEnable == true) {
-			dml2_printf("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n",
+			DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n",
 				__func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling));
-			DML2_ASSERT(0);
+			DML_ASSERT(0);
 		}
 	}
 
-#ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
-	dml2_printf("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight);
-	dml2_printf("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth);
-	dml2_printf("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
-	dml2_printf("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize);
-	dml2_printf("DML::%s: Pitch = %u\n", __func__, p->Pitch);
-	dml2_printf("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width);
-	dml2_printf("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height);
-#endif
+	DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
+	DML_LOG_VERBOSE("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight);
+	DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth);
+	DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
+	DML_LOG_VERBOSE("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize);
+	DML_LOG_VERBOSE("DML::%s: Pitch = %u\n", __func__, p->Pitch);
+	DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width);
+	DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height);
 
 	*p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
 	*p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth);
@@ -1811,7 +1754,7 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_
 			*p->dpte_row_height_linear = 128;
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub);
+		DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub);
 #endif
 
 	} else if (!dml_is_vertical_rotation(p->RotationAngle)) {
@@ -1825,7 +1768,7 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_
 			*p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth);
 		}
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub);
+		DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub);
 #endif
 
 		*p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize;
@@ -1840,7 +1783,7 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_
 
 		*p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize);
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub);
+		DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub);
 #endif
 	}
 
@@ -1852,18 +1795,18 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_
 	*p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
-	dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
-	dml2_printf("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height);
-	dml2_printf("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height);
-	dml2_printf("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear);
-	dml2_printf("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub);
-	dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow);
-	dml2_printf("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage);
-	dml2_printf("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests);
-	dml2_printf("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame);
-	dml2_printf("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame);
-	dml2_printf("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame);
+	DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
+	DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
+	DML_LOG_VERBOSE("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height);
+	DML_LOG_VERBOSE("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height);
+	DML_LOG_VERBOSE("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear);
+	DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub);
+	DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow);
+	DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage);
+	DML_LOG_VERBOSE("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests);
+	DML_LOG_VERBOSE("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame);
+	DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame);
+	DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame);
 #endif
 
 	return vm_bytes;
@@ -1894,12 +1837,12 @@ static unsigned int CalculatePrefetchSourceLines(
 	double numLines = 0;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio);
-	dml2_printf("DML::%s: VTaps = %u\n", __func__, VTaps);
-	dml2_printf("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
-	dml2_printf("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
-	dml2_printf("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
-	dml2_printf("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
+	DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio);
+	DML_LOG_VERBOSE("DML::%s: VTaps = %u\n", __func__, VTaps);
+	DML_LOG_VERBOSE("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
+	DML_LOG_VERBOSE("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
+	DML_LOG_VERBOSE("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
+	DML_LOG_VERBOSE("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
 #endif
 	if (ProgressiveToInterlaceUnitInOPP)
 		*VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1));
@@ -1934,11 +1877,11 @@ static unsigned int CalculatePrefetchSourceLines(
 	numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
-	dml2_printf("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
-	dml2_printf("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
-	dml2_printf("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
-	dml2_printf("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
+	DML_LOG_VERBOSE("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
+	DML_LOG_VERBOSE("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
+	DML_LOG_VERBOSE("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
+	DML_LOG_VERBOSE("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
+	DML_LOG_VERBOSE("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
 #endif
 	return (unsigned int)(numLines);
 
@@ -2007,8 +1950,8 @@ static void CalculateMALLUseForStaticScreen(
 		if (is_using_mall_for_ss[k])
 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]);
-		dml2_printf("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL);
+		DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL);
 #endif
 	}
 
@@ -2022,7 +1965,7 @@ static void CalculateMALLUseForStaticScreen(
 				(!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
 				CanAddAnotherSurfaceToMALL = true;
 				SurfaceToAddToMALL = k;
-				dml2_printf("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall);
+				DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall);
 			}
 		}
 		if (CanAddAnotherSurfaceToMALL) {
@@ -2030,8 +1973,8 @@ static void CalculateMALLUseForStaticScreen(
 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL);
-			dml2_printf("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL);
+			DML_LOG_VERBOSE("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL);
+			DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL);
 #endif
 		}
 	}
@@ -2203,15 +2146,15 @@ static void CalculateDCCConfiguration(
 		segment_order_vert_contiguous_chroma = 0;
 	}
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
-	dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
-	dml2_printf("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
-	dml2_printf("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
-	dml2_printf("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
-	dml2_printf("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
-	dml2_printf("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
-	dml2_printf("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
-	dml2_printf("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
+	DML_LOG_VERBOSE("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
+	DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
+	DML_LOG_VERBOSE("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
+	DML_LOG_VERBOSE("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
+	DML_LOG_VERBOSE("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
+	DML_LOG_VERBOSE("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
+	DML_LOG_VERBOSE("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
+	DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
+	DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
 #endif
 	if (DCCProgrammingAssumesScanDirectionUnknown == true) {
 		if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
@@ -2301,12 +2244,12 @@ static void CalculateDCCConfiguration(
 	}
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
-	dml2_printf("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
-	dml2_printf("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
-	dml2_printf("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
-	dml2_printf("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
-	dml2_printf("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
+	DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
+	DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
+	DML_LOG_VERBOSE("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
+	DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
+	DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
+	DML_LOG_VERBOSE("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
 #endif
 
 }
@@ -2326,26 +2269,26 @@ static void calculate_mcache_row_bytes(
 	unsigned int mvmpg_per_mcache;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: num_chans = %u\n", __func__, p->num_chans);
-	dml2_printf("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes);
-	dml2_printf("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes);
-	dml2_printf("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes);
-	dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
-	dml2_printf("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes);
-	dml2_printf("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary);
-	dml2_printf("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode);
-	dml2_printf("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x);
-	dml2_printf("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y);
-	dml2_printf("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width);
-	dml2_printf("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height);
-	dml2_printf("DML::%s: blk_width = %u\n", __func__, p->blk_width);
-	dml2_printf("DML::%s: blk_height = %u\n", __func__, p->blk_height);
-	dml2_printf("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width);
-	dml2_printf("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height);
-	dml2_printf("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes);
-#endif
-	DML2_ASSERT(p->mcache_line_size_bytes != 0);
-	DML2_ASSERT(p->mcache_size_bytes != 0);
+	DML_LOG_VERBOSE("DML::%s: num_chans = %u\n", __func__, p->num_chans);
+	DML_LOG_VERBOSE("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes);
+	DML_LOG_VERBOSE("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes);
+	DML_LOG_VERBOSE("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes);
+	DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
+	DML_LOG_VERBOSE("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes);
+	DML_LOG_VERBOSE("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary);
+	DML_LOG_VERBOSE("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode);
+	DML_LOG_VERBOSE("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x);
+	DML_LOG_VERBOSE("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y);
+	DML_LOG_VERBOSE("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width);
+	DML_LOG_VERBOSE("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height);
+	DML_LOG_VERBOSE("DML::%s: blk_width = %u\n", __func__, p->blk_width);
+	DML_LOG_VERBOSE("DML::%s: blk_height = %u\n", __func__, p->blk_height);
+	DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width);
+	DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height);
+	DML_LOG_VERBOSE("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes);
+#endif
+	DML_ASSERT(p->mcache_line_size_bytes != 0);
+	DML_ASSERT(p->mcache_size_bytes != 0);
 
 	*p->mvmpg_width = 0;
 	*p->mvmpg_height = 0;
@@ -2370,8 +2313,8 @@ static void calculate_mcache_row_bytes(
 				*p->mvmpg_width = p->vmpg_width;
 				*p->mvmpg_height = p->vmpg_height;
 			} else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) {
-				dml2_printf("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__);
-				DML2_ASSERT(0);
+				DML_LOG_VERBOSE("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__);
+				DML_ASSERT(0);
 			}
 		}
 
@@ -2439,25 +2382,25 @@ static void calculate_mcache_row_bytes(
 		*p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1);
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
-		dml2_printf("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes);
-		dml2_printf("DML::%s: blk_bytes = %u\n", __func__, blk_bytes);
-		dml2_printf("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel);
-		dml2_printf("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub);
-		dml2_printf("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub);
-		dml2_printf("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width);
-		dml2_printf("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height);
-		dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor);
-		dml2_printf("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor);
+		DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
+		DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes);
+		DML_LOG_VERBOSE("DML::%s: blk_bytes = %u\n", __func__, blk_bytes);
+		DML_LOG_VERBOSE("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel);
+		DML_LOG_VERBOSE("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub);
+		DML_LOG_VERBOSE("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub);
+		DML_LOG_VERBOSE("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width);
+		DML_LOG_VERBOSE("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height);
+		DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor);
+		DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor);
 #endif
 	}
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes);
-	dml2_printf("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel);
-	dml2_printf("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches);
+	DML_LOG_VERBOSE("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes);
+	DML_LOG_VERBOSE("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel);
+	DML_LOG_VERBOSE("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches);
 #endif
-	DML2_ASSERT(*p->num_mcaches > 0);
+	DML_ASSERT(*p->num_mcaches > 0);
 }
 
 static void calculate_mcache_setting(
@@ -2523,7 +2466,7 @@ static void calculate_mcache_setting(
 	l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l;
 
 	calculate_mcache_row_bytes(scratch, &l->l_p);
-	DML2_ASSERT(*p->num_mcaches_l > 0);
+	DML_ASSERT(*p->num_mcaches_l > 0);
 
 	if (l->is_dual_plane) {
 		l->c_p.num_chans = p->num_chans;
@@ -2559,7 +2502,7 @@ static void calculate_mcache_setting(
 		l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c;
 
 		calculate_mcache_row_bytes(scratch, &l->c_p);
-		DML2_ASSERT(*p->num_mcaches_c > 0);
+		DML_ASSERT(*p->num_mcaches_c > 0);
 	}
 
 	// Sharing for iMALL access
@@ -2598,28 +2541,28 @@ static void calculate_mcache_setting(
 	}
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: imall_enable = %u\n", __func__, p->imall_enable);
-	dml2_printf("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane);
-	dml2_printf("DML::%s: surf_vert = %u\n", __func__, p->surf_vert);
-	dml2_printf("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l);
-	dml2_printf("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l);
-	dml2_printf("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l);
-	dml2_printf("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l);
-	dml2_printf("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l);
-	dml2_printf("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l);
-	dml2_printf("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l);
+	DML_LOG_VERBOSE("DML::%s: imall_enable = %u\n", __func__, p->imall_enable);
+	DML_LOG_VERBOSE("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane);
+	DML_LOG_VERBOSE("DML::%s: surf_vert = %u\n", __func__, p->surf_vert);
+	DML_LOG_VERBOSE("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l);
+	DML_LOG_VERBOSE("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l);
+	DML_LOG_VERBOSE("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l);
+	DML_LOG_VERBOSE("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l);
+	DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l);
+	DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l);
+	DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l);
 
 	if (l->is_dual_plane) {
-		dml2_printf("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c);
-		dml2_printf("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c);
-		dml2_printf("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c);
-		dml2_printf("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor);
-		dml2_printf("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c);
-		dml2_printf("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c);
-		dml2_printf("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c);
-		dml2_printf("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c);
-		dml2_printf("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size);
-		dml2_printf("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache);
+		DML_LOG_VERBOSE("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c);
+		DML_LOG_VERBOSE("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c);
+		DML_LOG_VERBOSE("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c);
+		DML_LOG_VERBOSE("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor);
+		DML_LOG_VERBOSE("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c);
+		DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c);
+		DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c);
+		DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c);
+		DML_LOG_VERBOSE("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size);
+		DML_LOG_VERBOSE("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache);
 	}
 #endif
 	// calculate split_coordinate
@@ -2639,11 +2582,11 @@ static void calculate_mcache_setting(
 	}
 #ifdef __DML_VBA_DEBUG__
 	for (n = 0; n < *p->num_mcaches_l; n++)
-		dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
+		DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
 
 	if (l->is_dual_plane) {
 		for (n = 0; n < *p->num_mcaches_c; n++)
-			dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
+			DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
 	}
 #endif
 
@@ -2660,10 +2603,10 @@ static void calculate_mcache_setting(
 
 #ifdef __DML_VBA_DEBUG__
 		for (n = 0; n < *p->num_mcaches_l; n++)
-			dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
+			DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
 
 		for (n = 0; n < *p->num_mcaches_c; n++)
-			dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
+			DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
 #endif
 	}
 
@@ -2694,8 +2637,8 @@ static void calculate_mall_bw_overhead_factor(
 			mall_prefetch_dram_overhead_factor[k] = 2.0;
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]);
-		dml2_printf("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]);
 #endif
 	}
 }
@@ -2772,22 +2715,20 @@ static double dml_get_return_bandwidth_available(
 	else // dml2_core_internal_bw_dram
 		return_bw_mbps = derate_dram_bandwidth;
 
-#ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw);
-	dml2_printf("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en);
-	dml2_printf("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only);
-	dml2_printf("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type));
-	dml2_printf("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type));
-	dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
-	dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
-	dml2_printf("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth);
-	dml2_printf("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth);
-	dml2_printf("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth);
-	dml2_printf("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor);
-	dml2_printf("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor);
-	dml2_printf("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor);
-	dml2_printf("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps);
-#endif
+	DML_LOG_VERBOSE("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw);
+	DML_LOG_VERBOSE("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en);
+	DML_LOG_VERBOSE("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only);
+	DML_LOG_VERBOSE("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type));
+	DML_LOG_VERBOSE("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type));
+	DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
+	DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
+	DML_LOG_VERBOSE("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth);
+	DML_LOG_VERBOSE("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth);
+	DML_LOG_VERBOSE("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth);
+	DML_LOG_VERBOSE("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor);
+	DML_LOG_VERBOSE("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor);
+	DML_LOG_VERBOSE("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor);
+	DML_LOG_VERBOSE("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps);
 	return return_bw_mbps;
 }
 
@@ -2807,9 +2748,9 @@ static noinline_for_stack void calculate_bandwidth_available(
 {
 	unsigned int n, m;
 
-	dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
-	dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
-	dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps);
+	DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
+	DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
+	DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps);
 
 	// Calculate all the bandwidth availabe
 	for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
@@ -2828,8 +2769,8 @@ static noinline_for_stack void calculate_bandwidth_available(
 
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]);
-			dml2_printf("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]);
+			DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]);
+			DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]);
 #endif
 
 			// urg_bandwidth_available_vm_only is indexed by soc_state
@@ -2843,9 +2784,9 @@ static noinline_for_stack void calculate_bandwidth_available(
 		urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]);
-		dml2_printf("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]);
-		dml2_printf("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]);
+		DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]);
+		DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]);
+		DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]);
 #endif
 	}
 }
@@ -2879,13 +2820,13 @@ static void calculate_avg_bandwidth_required(
 	// SysActive and SVP Prefetch AVG bandwidth Check
 	for (k = 0; k < num_active_planes; ++k) {
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: plane %0d\n", __func__, k);
-		dml2_printf("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]);
-		dml2_printf("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]);
-		dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]);
-		dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]);
-		dml2_printf("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]);
-		dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]);
+		DML_LOG_VERBOSE("DML::%s: plane %0d\n", __func__, k);
+		DML_LOG_VERBOSE("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]);
+		DML_LOG_VERBOSE("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]);
+		DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]);
+		DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]);
+		DML_LOG_VERBOSE("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]);
+		DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]);
 #endif
 
 		sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k];
@@ -2902,10 +2843,10 @@ static void calculate_avg_bandwidth_required(
 		avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
-		dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
-		dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
-		dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+		DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+		DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+		DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+		DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
 #endif
 	}
 }
@@ -3080,10 +3021,10 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch,
 			&p->MaxNumSwathY[k]);
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l);
-		dml2_printf("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c);
-		dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]);
-		dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l);
+		DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c);
+		DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]);
 #endif
 		p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels);
 		p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k];
@@ -3091,8 +3032,8 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch,
 		p->meta_row_bytes_per_row_ub_c[k] = s->meta_row_bytes_per_row_ub_c[k];
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]);
-		dml2_printf("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]);
 #endif
 		if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) {
 			p->PTEBufferSizeNotExceeded[k] = true;
@@ -3104,18 +3045,18 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch,
 												s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]);
 #ifdef __DML_VBA_DEBUG__
 		if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) {
-			dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
-			dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
-			dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]);
-			dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]);
-			dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]);
-			dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]);
-			dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
 
-			dml2_printf("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels);
-			dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]);
-			dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]);
-			dml2_printf("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels);
+			DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]);
 		}
 #endif
 	}
@@ -3146,8 +3087,8 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch,
 	for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
 		p->DCCMetaBufferSizeNotExceeded[k] = true;
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]);
-		dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]);
 #endif
 		p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
 			(dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle));
@@ -3170,9 +3111,9 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch,
 			p->DCCMetaBufferSizeNotExceeded[k] = false;
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%d, meta_row_bytes = %d\n",  __func__, k, p->meta_row_bytes[k]);
-			dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n",  __func__, k, p->DCCMetaBufferSizeBytes);
-			dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n",  __func__, k, p->DCCMetaBufferSizeNotExceeded[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bytes = %d\n",  __func__, k, p->meta_row_bytes[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n",  __func__, k, p->DCCMetaBufferSizeBytes);
+			DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n",  __func__, k, p->DCCMetaBufferSizeNotExceeded[k]);
 #endif
 		}
 
@@ -3209,20 +3150,20 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch,
 			&p->dpte_row_bw[k],
 			&p->meta_row_bw[k]);
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
-		dml2_printf("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]);
-		dml2_printf("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config);
-		dml2_printf("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]);
-		dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
-		dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
-		dml2_printf("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]);
-		dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
-		dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
-		dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]);
-		dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
-		dml2_printf("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable);
-		dml2_printf("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]);
-		dml2_printf("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config);
+		DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable);
+		DML_LOG_VERBOSE("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]);
 #endif
 	}
 }
@@ -3257,19 +3198,19 @@ static double CalculateUrgentLatency(
 	}
 #ifdef __DML_VBA_DEBUG__
 	if (qos_type == dml2_qos_param_type_dcn4x) {
-		dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type);
-		dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles);
-		dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
-		dml2_printf("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin);
+		DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type);
+		DML_LOG_VERBOSE("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles);
+		DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
+		DML_LOG_VERBOSE("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin);
 	} else {
-		dml2_printf("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly);
-		dml2_printf("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData);
-		dml2_printf("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly);
-		dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent);
-		dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference);
+		DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly);
+		DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData);
+		DML_LOG_VERBOSE("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly);
+		DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent);
+		DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference);
 	}
-	dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock);
-	dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency);
+	DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+	DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency);
 #endif
 	return urgent_latency;
 }
@@ -3296,18 +3237,18 @@ static double CalculateTripToMemory(
 
 #ifdef __DML_VBA_DEBUG__
 	if (qos_type == dml2_qos_param_type_dcn4x) {
-		dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type);
-		dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles);
-		dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles);
-		dml2_printf("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles);
-		dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
-		dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock);
-		dml2_printf("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin);
-		dml2_printf("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin);
+		DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type);
+		DML_LOG_VERBOSE("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles);
+		DML_LOG_VERBOSE("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles);
+		DML_LOG_VERBOSE("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles);
+		DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
+		DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+		DML_LOG_VERBOSE("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin);
+		DML_LOG_VERBOSE("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin);
 	} else {
-		dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
+		DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
 	}
-	dml2_printf("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us);
+	DML_LOG_VERBOSE("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us);
 #endif
 
 
@@ -3334,14 +3275,14 @@ static double CalculateMetaTripToMemory(
 
 #ifdef __DML_VBA_DEBUG__
 	if (qos_type == dml2_qos_param_type_dcn4x) {
-		dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type);
-		dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles);
-		dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles);
-		dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
+		DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type);
+		DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles);
+		DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles);
+		DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
 	} else {
-		dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
+		DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
 	}
-	dml2_printf("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us);
+	DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us);
 #endif
 
 
@@ -3358,7 +3299,6 @@ static void calculate_cursor_req_attributes(
 	unsigned int *cursor_bytes_per_chunk,
 	unsigned int *cursor_bytes)
 {
-	unsigned int cursor_pitch = 0;
 	unsigned int cursor_bytes_per_req = 0;
 	unsigned int cursor_width_bytes = 0;
 	unsigned int cursor_height = 0;
@@ -3366,10 +3306,6 @@ static void calculate_cursor_req_attributes(
 	//SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply.
 	//- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B
 	//- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width
-	if (cursor_bpp == 2)
-		cursor_pitch = 256;
-	else
-		cursor_pitch = (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1);
 
 	//The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows.
 
@@ -3409,8 +3345,8 @@ static void calculate_cursor_req_attributes(
 			*cursor_lines_per_chunk = 1;
 	} else {
 		if (cursor_width > 0) {
-			dml2_printf("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp);
-			DML2_ASSERT(0);
+			DML_LOG_VERBOSE("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp);
+			DML_ASSERT(0);
 		}
 	}
 
@@ -3421,15 +3357,15 @@ static void calculate_cursor_req_attributes(
 	cursor_height = cursor_width;
 	*cursor_bytes = *cursor_bytes_per_line * cursor_height;
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp);
-	dml2_printf("DML::%s: cursor_width = %d\n", __func__, cursor_width);
-	dml2_printf("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes);
-	dml2_printf("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req);
-	dml2_printf("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk);
-	dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line);
-	dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk);
-	dml2_printf("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes);
-	dml2_printf("DML::%s: cursor_pitch = %d\n", __func__, cursor_pitch);
+	DML_LOG_VERBOSE("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp);
+	DML_LOG_VERBOSE("DML::%s: cursor_width = %d\n", __func__, cursor_width);
+	DML_LOG_VERBOSE("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes);
+	DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req);
+	DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk);
+	DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line);
+	DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk);
+	DML_LOG_VERBOSE("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes);
+	DML_LOG_VERBOSE("DML::%s: cursor_pitch = %d\n", __func__, cursor_bpp == 2 ? 256 : (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1));
 #endif
 }
 
@@ -3460,13 +3396,13 @@ static void calculate_cursor_urgent_burst_factor(
 		}
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer);
-		dml2_printf("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime);
-		dml2_printf("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize);
-		dml2_printf("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk);
-		dml2_printf("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk);
-		dml2_printf("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor);
-		dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
+		DML_LOG_VERBOSE("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer);
+		DML_LOG_VERBOSE("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime);
+		DML_LOG_VERBOSE("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize);
+		DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk);
+		DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk);
+		DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor);
+		DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
 #endif
 
 	}
@@ -3501,15 +3437,15 @@ static void CalculateUrgentBurstFactor(
 	*UrgentBurstFactorChroma = 0;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio);
-	dml2_printf("DML::%s: VRatioC = %f\n", __func__, VRatioC);
-	dml2_printf("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY);
-	dml2_printf("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC);
-	dml2_printf("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY);
-	dml2_printf("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
-	dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime);
+	DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio);
+	DML_LOG_VERBOSE("DML::%s: VRatioC = %f\n", __func__, VRatioC);
+	DML_LOG_VERBOSE("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY);
+	DML_LOG_VERBOSE("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC);
+	DML_LOG_VERBOSE("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY);
+	DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
+	DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime);
 #endif
-	DML2_ASSERT(VRatio > 0);
+	DML_ASSERT(VRatio > 0);
 
 	LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
 
@@ -3534,12 +3470,12 @@ static void CalculateUrgentBurstFactor(
 	}
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma);
-	dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
-	dml2_printf("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma);
-	dml2_printf("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma);
-	dml2_printf("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma);
-	dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
+	DML_LOG_VERBOSE("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma);
+	DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
+	DML_LOG_VERBOSE("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma);
+	DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma);
+	DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma);
+	DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
 #endif
 }
 
@@ -3600,10 +3536,10 @@ static void CalculateDCFCLKDeepSleepTdlut(
 		if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) {
 			double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k];
 
-			dml2_printf("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
-			dml2_printf("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]);
-			dml2_printf("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]);
-			dml2_printf("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk);
+			DML_LOG_VERBOSE("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk);
 
 			// increase the deepsleep dcfclk to match the original dispclk throughput rate
 			if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) {
@@ -3613,8 +3549,8 @@ static void CalculateDCFCLKDeepSleepTdlut(
 		}
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz);
-		dml2_printf("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz);
+		DML_LOG_VERBOSE("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
 #endif
 	}
 
@@ -3625,17 +3561,17 @@ static void CalculateDCFCLKDeepSleepTdlut(
 	*DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth);
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__);
-	dml2_printf("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
-	dml2_printf("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth);
-	dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
+	DML_LOG_VERBOSE("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__);
+	DML_LOG_VERBOSE("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
+	DML_LOG_VERBOSE("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth);
+	DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
 #endif
 
 	for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
 		*DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
 	}
 
-	dml2_printf("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
+	DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
 }
 
 static noinline_for_stack void CalculateDCFCLKDeepSleep(
@@ -3731,12 +3667,12 @@ static unsigned int CalculateMaxVStartup(
 	else
 		max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0));
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: VBlankNom = %u\n", __func__, timing->vblank_nom);
-	dml2_printf("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us);
-	dml2_printf("DML::%s: line_time_us = %f\n", __func__, line_time_us);
-	dml2_printf("DML::%s: vblank_actual = %u\n", __func__, vblank_actual);
-	dml2_printf("DML::%s: vblank_avail = %u\n", __func__, vblank_avail);
-	dml2_printf("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines);
+	DML_LOG_VERBOSE("DML::%s: VBlankNom = %lu\n", __func__, timing->vblank_nom);
+	DML_LOG_VERBOSE("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us);
+	DML_LOG_VERBOSE("DML::%s: line_time_us = %f\n", __func__, line_time_us);
+	DML_LOG_VERBOSE("DML::%s: vblank_actual = %u\n", __func__, vblank_actual);
+	DML_LOG_VERBOSE("DML::%s: vblank_avail = %u\n", __func__, vblank_avail);
+	DML_LOG_VERBOSE("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines);
 #endif
 	max_vstartup_lines = (unsigned int)math_min2(max_vstartup_lines, DML_MAX_VSTARTUP_START);
 	return max_vstartup_lines;
@@ -3761,9 +3697,9 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch
 	const long MAXIMUMCOMPRESSION = 4;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
+	DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
 	for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
-		dml2_printf("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]);
+		DML_LOG_VERBOSE("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]);
 	}
 #endif
 	CalculateSwathWidth(
@@ -3797,15 +3733,15 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch
 		p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]);
 		p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]);
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
-		dml2_printf("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]);
-		dml2_printf("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]);
-		dml2_printf("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]);
-		dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
-		dml2_printf("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]);
-		dml2_printf("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]);
-		dml2_printf("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]);
-		dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
 #endif
 		if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) {
 			p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256));
@@ -3848,11 +3784,11 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch
 		p->CompressedBufferSizeInkByte);
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP);
-	dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
-	dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
-	dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled);
-	dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
+	DML_LOG_VERBOSE("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP);
+	DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
+	DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
+	DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled);
+	DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
 #endif
 
 	*p->ViewportSizeSupport = true;
@@ -3860,7 +3796,7 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch
 
 		DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]);
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
+		DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
 #endif
 		if (p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
 			p->SwathHeightY[k] = MaximumSwathHeightY[k];
@@ -3917,13 +3853,13 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch
 		if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) ||
 			p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) {
 			*p->ViewportSizeSupport = false;
-			dml2_printf("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]);
-			dml2_printf("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]);
-			dml2_printf("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
-			dml2_printf("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]);
-			dml2_printf("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]);
-			dml2_printf("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]);
-			dml2_printf("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
+			DML_LOG_VERBOSE("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]);
 			p->ViewportSizeSupportPerSurface[k] = false;
 		} else {
 			p->ViewportSizeSupportPerSurface[k] = true;
@@ -3931,35 +3867,35 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch
 
 		if (p->SwathHeightC[k] == 0) {
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k);
+			DML_LOG_VERBOSE("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k);
 #endif
 			p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024;
 			p->DETBufferSizeC[k] = 0;
 		} else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) {
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k);
+			DML_LOG_VERBOSE("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k);
 #endif
 			p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
 			p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
 		} else {
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k);
+			DML_LOG_VERBOSE("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k);
 #endif
 			p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024));
 			p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k];
 		}
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
-		dml2_printf("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]);
-		dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
-		dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
-		dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]);
-		dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
-		dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
-		dml2_printf("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
-		dml2_printf("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]);
-		dml2_printf("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]);
 #endif
 
 	}
@@ -3969,12 +3905,12 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch
 		*p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b,
 			(double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0);
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]);
-		dml2_printf("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes);
+		DML_LOG_VERBOSE("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]);
+		DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes);
 #endif
 	}
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b);
+	DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b);
 #endif
 
 	*p->hw_debug5 = false;
@@ -3989,12 +3925,12 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch
 				+ *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k])))
 			*p->hw_debug5 = true;
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled);
-		dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION);
-		dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH);
-		dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte);
-		dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
-		dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5);
+		DML_LOG_VERBOSE("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled);
+		DML_LOG_VERBOSE("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION);
+		DML_LOG_VERBOSE("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH);
+		DML_LOG_VERBOSE("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte);
+		DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5);
 #endif
 	}
 #endif
@@ -4192,15 +4128,15 @@ static noinline_for_stack void CalculateODMMode(
 	SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock);
 	SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock);
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: ODMUse = %d\n", __func__, ODMUse);
-	dml2_printf("DML::%s: Output = %d\n", __func__, Output);
-	dml2_printf("DML::%s: DSCEnable = %d\n", __func__, DSCEnable);
-	dml2_printf("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk);
-	dml2_printf("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit);
-	dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine);
-	dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne);
-	dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne);
-	dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne);
+	DML_LOG_VERBOSE("DML::%s: ODMUse = %d\n", __func__, ODMUse);
+	DML_LOG_VERBOSE("DML::%s: Output = %d\n", __func__, Output);
+	DML_LOG_VERBOSE("DML::%s: DSCEnable = %d\n", __func__, DSCEnable);
+	DML_LOG_VERBOSE("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk);
+	DML_LOG_VERBOSE("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit);
+	DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine);
+	DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne);
+	DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne);
+	DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne);
 #endif
 	if (ODMUse == dml2_odm_mode_auto)
 		DecidedODMMode = DecideODMMode(HActive,
@@ -4245,10 +4181,10 @@ static noinline_for_stack void CalculateODMMode(
 	*NumberOfDPP = NumberOfDPPRequired;
 	*RequiredDISPCLKPerSurface = success ? DISPCLKRequired : 0;
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: ODMMode = %d\n", __func__, *ODMMode);
-	dml2_printf("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP);
-	dml2_printf("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport);
-	dml2_printf("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface);
+	DML_LOG_VERBOSE("DML::%s: ODMMode = %d\n", __func__, *ODMMode);
+	DML_LOG_VERBOSE("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP);
+	DML_LOG_VERBOSE("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport);
+	DML_LOG_VERBOSE("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface);
 #endif
 }
 
@@ -4292,17 +4228,17 @@ static noinline_for_stack void CalculateOutputLink(
 	*OutputRate = dml2_core_internal_output_rate_unknown;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable);
-	dml2_printf("DML::%s: PHYCLK = %f\n", __func__, PHYCLK);
-	dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
-	dml2_printf("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate);
-	dml2_printf("DML::%s: HActive = %u\n", __func__, HActive);
-	dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal);
-	dml2_printf("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC);
-	dml2_printf("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC);
-	dml2_printf("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP);
-	dml2_printf("DML::%s: Output (encoder) = %u\n", __func__, Output);
-	dml2_printf("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate);
+	DML_LOG_VERBOSE("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable);
+	DML_LOG_VERBOSE("DML::%s: PHYCLK = %f\n", __func__, PHYCLK);
+	DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
+	DML_LOG_VERBOSE("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate);
+	DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive);
+	DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal);
+	DML_LOG_VERBOSE("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC);
+	DML_LOG_VERBOSE("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC);
+	DML_LOG_VERBOSE("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP);
+	DML_LOG_VERBOSE("DML::%s: Output (encoder) = %u\n", __func__, Output);
+	DML_LOG_VERBOSE("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate);
 #endif
 	{
 		if (Output == dml2_hdmi) {
@@ -4487,9 +4423,9 @@ static noinline_for_stack void CalculateOutputLink(
 		}
 	}
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC);
-	dml2_printf("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC);
-	dml2_printf("DML::%s: OutBpp = %f\n", __func__, *OutBpp);
+	DML_LOG_VERBOSE("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC);
+	DML_LOG_VERBOSE("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC);
+	DML_LOG_VERBOSE("DML::%s: OutBpp = %f\n", __func__, *OutBpp);
 #endif
 }
 
@@ -4571,17 +4507,17 @@ static unsigned int DSCDelayRequirement(
 		DSCDelayRequirement_val = 0;
 	}
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled);
-	dml2_printf("DML::%s: ODMMode = %u\n", __func__, ODMMode);
-	dml2_printf("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
-	dml2_printf("DML::%s: HActive = %u\n", __func__, HActive);
-	dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal);
-	dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock);
-	dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
-	dml2_printf("DML::%s: OutputFormat = %u\n", __func__, OutputFormat);
-	dml2_printf("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent);
-	dml2_printf("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices);
-	dml2_printf("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val);
+	DML_LOG_VERBOSE("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled);
+	DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, ODMMode);
+	DML_LOG_VERBOSE("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
+	DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive);
+	DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal);
+	DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock);
+	DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
+	DML_LOG_VERBOSE("DML::%s: OutputFormat = %u\n", __func__, OutputFormat);
+	DML_LOG_VERBOSE("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent);
+	DML_LOG_VERBOSE("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices);
+	DML_LOG_VERBOSE("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val);
 #endif
 
 	return DSCDelayRequirement_val;
@@ -4654,10 +4590,10 @@ static void CalculateSurfaceSizeInMall(
 		(TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024);
-	dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP);
-	dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS);
-	dml2_printf("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize);
+	DML_LOG_VERBOSE("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024);
+	DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP);
+	DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS);
+	DML_LOG_VERBOSE("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize);
 #endif
 }
 
@@ -4674,7 +4610,6 @@ static void calculate_tdlut_setting(
 	unsigned int tdlut_vmpg_per_frame;
 	unsigned int tdlut_pte_req_per_frame;
 	unsigned int tdlut_bytes_per_line;
-	unsigned int tdlut_delivery_cycles;
 	double tdlut_drain_rate;
 	unsigned int tdlut_mpc_width;
 	unsigned int tdlut_bytes_per_group_simple;
@@ -4737,13 +4672,13 @@ static void calculate_tdlut_setting(
 		*p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width;
 		*p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width;
 		//the delivery cycles is DispClk cycles per line * number of lines * number of slices
-		tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width;
+		//tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width;
 		tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1);
 	} else {
 		//tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements
 		*p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256);
 		*p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple;
-		tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1);
+		//tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1);
 		tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz;
 	}
 
@@ -4756,25 +4691,25 @@ static void calculate_tdlut_setting(
 	}
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable);
-	dml2_printf("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes);
-	dml2_printf("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame);
-	dml2_printf("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame);
+	DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable);
+	DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes);
+	DML_LOG_VERBOSE("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame);
+	DML_LOG_VERBOSE("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame);
 
-	dml2_printf("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz);
-	dml2_printf("DML::%s: tdlut_width = %u\n", __func__, tdlut_width);
-	dml2_printf("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear");
-	dml2_printf("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes);
-	dml2_printf("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes);
-	dml2_printf("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame);
-	dml2_printf("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line);
-	dml2_printf("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group);
-	dml2_printf("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate);
-	dml2_printf("DML::%s: tdlut_delivery_cycles = %u\n", __func__, tdlut_delivery_cycles);
-	dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time);
-	dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time);
-	dml2_printf("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver);
-	dml2_printf("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub);
+	DML_LOG_VERBOSE("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz);
+	DML_LOG_VERBOSE("DML::%s: tdlut_width = %u\n", __func__, tdlut_width);
+	DML_LOG_VERBOSE("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear");
+	DML_LOG_VERBOSE("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes);
+	DML_LOG_VERBOSE("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes);
+	DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame);
+	DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line);
+	DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group);
+	DML_LOG_VERBOSE("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate);
+	DML_LOG_VERBOSE("DML::%s: tdlut_delivery_cycles = %u\n", __func__, p->tdlut_addressing_mode == dml2_tdlut_sw_linear ? (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width : (unsigned int)math_ceil2(tdlut_width/2.0, 1));
+	DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time);
+	DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time);
+	DML_LOG_VERBOSE("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver);
+	DML_LOG_VERBOSE("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub);
 #endif
 }
 
@@ -4820,10 +4755,10 @@ static void CalculateTarb(
 	*Tarb = extra_bytes / ReturnBW;
 	*Tarb_prefetch = extra_bytes_prefetch / ReturnBW;
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte);
-	dml2_printf("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize);
-	dml2_printf("DML::%s: extra_bytes = %f\n", __func__, extra_bytes);
-	dml2_printf("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch);
+	DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte);
+	DML_LOG_VERBOSE("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize);
+	DML_LOG_VERBOSE("DML::%s: extra_bytes = %f\n", __func__, extra_bytes);
+	DML_LOG_VERBOSE("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch);
 #endif
 }
 
@@ -4838,10 +4773,10 @@ static double CalculateTWait(
 	TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: reserved_vblank_time_ns = %d\n", __func__, reserved_vblank_time_ns);
-	dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
-	dml2_printf("DML::%s: Ttrip = %f\n", __func__, Ttrip);
-	dml2_printf("DML::%s: TWait = %f\n", __func__, TWait);
+	DML_LOG_VERBOSE("DML::%s: reserved_vblank_time_ns = %ld\n", __func__, reserved_vblank_time_ns);
+	DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
+	DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, Ttrip);
+	DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, TWait);
 #endif
 	return TWait;
 }
@@ -4887,20 +4822,20 @@ static void CalculateVUpdateAndDynamicMetadataParameters(
 		*Tdmsks = *Tdmsks / 2;
 	}
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
-	dml2_printf("DML::%s: VBlank = %u\n", __func__, VBlank);
-	dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal);
-	dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock);
-	dml2_printf("DML::%s: Dppclk = %f\n", __func__, Dppclk);
-	dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
-	dml2_printf("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
-	dml2_printf("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
+	DML_LOG_VERBOSE("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
+	DML_LOG_VERBOSE("DML::%s: VBlank = %u\n", __func__, VBlank);
+	DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal);
+	DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock);
+	DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, Dppclk);
+	DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
+	DML_LOG_VERBOSE("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
+	DML_LOG_VERBOSE("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
 
-	dml2_printf("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
-	dml2_printf("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
-	dml2_printf("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
+	DML_LOG_VERBOSE("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
+	DML_LOG_VERBOSE("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
+	DML_LOG_VERBOSE("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
 
-	dml2_printf("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
+	DML_LOG_VERBOSE("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
 #endif
 }
 
@@ -4962,11 +4897,11 @@ static double get_urgent_bandwidth_required(
 		l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k];
 
 		bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]);
-		bool exclude_this_plane = 0;
+		bool exclude_this_plane = false;
 
 		// Exclude phantom pipe in bw calculation for non svp prefetch state
 		if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom)
-			exclude_this_plane = 1;
+			exclude_this_plane = true;
 
 		// The qualified row bandwidth, qual_row_bw, accounts for the regular non-flip row bandwidth when there is no possible immediate flip or HostVM invalidation flip.
 		// The qual_row_bw is zero if HostVM is possible and only non-zero and equal to row_bw(i) if immediate flip is not allowed for that pipe.
@@ -4995,12 +4930,12 @@ static double get_urgent_bandwidth_required(
 			surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]);
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw);
-			dml2_printf("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw);
-			dml2_printf("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw);
-			dml2_printf("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw);
-			dml2_printf("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]);
-			dml2_printf("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw);
+			DML_LOG_VERBOSE("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw);
+			DML_LOG_VERBOSE("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw);
+			DML_LOG_VERBOSE("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw);
+			DML_LOG_VERBOSE("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]);
 #endif
 		} else {
 			surface_required_bw[k] = 0.0;
@@ -5009,34 +4944,34 @@ static double get_urgent_bandwidth_required(
 		l->required_bandwidth_mbps += surface_required_bw[k];
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]);
-		dml2_printf("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw);
-		dml2_printf("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
-		dml2_printf("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor);
-		dml2_printf("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0);
-		dml2_printf("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1);
-		dml2_printf("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur);
+		DML_LOG_VERBOSE("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw);
+		DML_LOG_VERBOSE("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
+		DML_LOG_VERBOSE("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor);
+		DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0);
+		DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1);
+		DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur);
 
-		dml2_printf("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre);
-		dml2_printf("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre);
-		dml2_printf("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre);
+		DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre);
+		DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre);
+		DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre);
 
-		dml2_printf("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]);
-		dml2_printf("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]);
-		dml2_printf("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]);
-		dml2_printf("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]);
-		dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]);
-		dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]);
-		dml2_printf("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]);
 
-		dml2_printf("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]);
-		dml2_printf("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]);
-		dml2_printf("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]);
-		dml2_printf("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]);
-		dml2_printf("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]);
-		dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
-		dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane);
-		dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
+		DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
+		DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane);
+		DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
 #endif
 	}
 
@@ -5120,19 +5055,19 @@ static void CalculateExtraLatency(
 	*ExtraLatency_sr = *ExtraLatency_sr + Tarb;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type);
-	dml2_printf("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode);
-	dml2_printf("DML::%s: Tex_trips=%u\n", __func__, Tex_trips);
-	dml2_printf("DML::%s: max_outstanding_when_urgent_expected=%u\n", __func__, max_outstanding_when_urgent_expected);
-	dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock);
-	dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
-	dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
-	dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles);
-	dml2_printf("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes);
-	dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb);
-	dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency);
-	dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr);
-	dml2_printf("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch);
+	DML_LOG_VERBOSE("DML::%s: qos_type=%u\n", __func__, qos_type);
+	DML_LOG_VERBOSE("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode);
+	DML_LOG_VERBOSE("DML::%s: Tex_trips=%f\n", __func__, Tex_trips);
+	DML_LOG_VERBOSE("DML::%s: max_outstanding_when_urgent_expected=%u\n", __func__, max_outstanding_when_urgent_expected);
+	DML_LOG_VERBOSE("DML::%s: FabricClock=%f\n", __func__, FabricClock);
+	DML_LOG_VERBOSE("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
+	DML_LOG_VERBOSE("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
+	DML_LOG_VERBOSE("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles);
+	DML_LOG_VERBOSE("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes);
+	DML_LOG_VERBOSE("DML::%s: Tarb=%f\n", __func__, Tarb);
+	DML_LOG_VERBOSE("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency);
+	DML_LOG_VERBOSE("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr);
+	DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch);
 #endif
 }
 
@@ -5199,20 +5134,20 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 		s->HostVMDynamicLevelsTrips = 0;
 	}
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable);
-	dml2_printf("DML::%s: mrq_present = %u\n", __func__, p->mrq_present);
-	dml2_printf("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable);
-	dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable);
-	dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
-	dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
-	dml2_printf("DML::%s: VStartup = %u\n", __func__, p->VStartup);
-	dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable);
-	dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
-	dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait);
-	dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
-	dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
-	dml2_printf("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
-	dml2_printf("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk);
+	DML_LOG_VERBOSE("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable);
+	DML_LOG_VERBOSE("DML::%s: mrq_present = %u\n", __func__, p->mrq_present);
+	DML_LOG_VERBOSE("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable);
+	DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable);
+	DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
+	DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
+	DML_LOG_VERBOSE("DML::%s: VStartup = %u\n", __func__, p->VStartup);
+	DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable);
+	DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
+	DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait);
+	DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
+	DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
+	DML_LOG_VERBOSE("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
+	DML_LOG_VERBOSE("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk);
 #endif
 	CalculateVUpdateAndDynamicMetadataParameters(
 		p->MaxInterDCNTileRepeaters,
@@ -5258,11 +5193,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 	if (p->DynamicMetadataEnable == true) {
 		if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
 			*p->NotEnoughTimeForDynamicMetadata = true;
-			dml2_printf("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
-			dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
-			dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
-			dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
-			dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
+			DML_LOG_VERBOSE("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
+			DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
+			DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
+			DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
+			DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
 		} else {
 			*p->NotEnoughTimeForDynamicMetadata = false;
 		}
@@ -5288,21 +5223,21 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 		((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0));
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled);
-	dml2_printf("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
-	dml2_printf("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
-	dml2_printf("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
-	dml2_printf("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
-	dml2_printf("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
-	dml2_printf("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
-	dml2_printf("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
-	dml2_printf("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
-	dml2_printf("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
+	DML_LOG_VERBOSE("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled);
+	DML_LOG_VERBOSE("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
+	DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
+	DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
+	DML_LOG_VERBOSE("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
+	DML_LOG_VERBOSE("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
+	DML_LOG_VERBOSE("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
+	DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
+	DML_LOG_VERBOSE("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
+	DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
 
-	dml2_printf("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut);
-	dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time);
-	dml2_printf("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame);
-	dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time);
+	DML_LOG_VERBOSE("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut);
+	DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time);
+	DML_LOG_VERBOSE("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame);
+	DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time);
 #endif
 
 	if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
@@ -5314,17 +5249,17 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 	*p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1));
 	*p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal)));
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
-	dml2_printf("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
+	DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
+	DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
 #endif
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
-	dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
-	dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
-	dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
-	dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
-	dml2_printf("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips);
+	DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
+	DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
+	DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
+	DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
+	DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
+	DML_LOG_VERBOSE("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips);
 #endif
 	if (p->display_cfg->gpuvm_enable) {
 		s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
@@ -5402,7 +5337,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 	}
 
 	/* oto prefetch bw should be always be less than total vactive bw */
-	//DML2_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface);
+	//DML_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface);
 
 	s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor;
 
@@ -5421,9 +5356,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 	*p->RequiredPrefetchBWOTO = s->prefetch_bw_oto;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l);
-	dml2_printf("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c);
-	dml2_printf("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw);
+	DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l);
+	DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c);
+	DML_LOG_VERBOSE("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw);
 #endif
 
 	if (p->display_cfg->gpuvm_enable == true) {
@@ -5433,9 +5368,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 			s->LineTime / 4.0);
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips);
-		dml2_printf("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto);
-		dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0);
+		DML_LOG_VERBOSE("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips);
+		DML_LOG_VERBOSE("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto);
+		DML_LOG_VERBOSE("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0);
 #endif
 	} else {
 		s->Tvm_oto = s->Tvm_trips_rounded;
@@ -5447,9 +5382,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 			(p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto,
 			s->LineTime / 4.0);
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips);
-		dml2_printf("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto);
-		dml2_printf("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4);
+		DML_LOG_VERBOSE("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips);
+		DML_LOG_VERBOSE("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto);
+		DML_LOG_VERBOSE("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4);
 #endif
 	} else
 		s->Tr0_oto = s->LineTime / 4.0;
@@ -5459,11 +5394,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 	s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
 
 #ifdef DML_GLOBAL_PREFETCH_CHECK
-	dml2_printf("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre);
+	DML_LOG_VERBOSE("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre);
 	if (p->impacted_dst_y_pre > 0) {
-		dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
+		DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
 		s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre);
-		dml2_printf("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto);
+		DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto);
 	}
 #endif
 	*p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime;
@@ -5492,72 +5427,71 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 	s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
-	dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
-	dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
-	dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
-	dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip);
-	dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
-	dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
-	dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor);
-	dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
-	dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
-	dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
-	dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
-	dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
-	dml2_printf("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
-	dml2_printf("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes);
-	dml2_printf("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw);
-	dml2_printf("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
-	dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
-	dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
-	dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
-	dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
-	dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
-	dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip);
-	dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip);
-	dml2_printf("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr);
-	dml2_printf("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
-	dml2_printf("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
-	dml2_printf("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
-	dml2_printf("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
-	dml2_printf("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
-	dml2_printf("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
-	dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
-	dml2_printf("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
-	dml2_printf("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes);
-	dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes);
-#endif
-	double Tpre = s->dst_y_prefetch_equ * s->LineTime;
+	DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
+	DML_LOG_VERBOSE("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
+	DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
+	DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
+	DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip);
+	DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
+	DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
+	DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor);
+	DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
+	DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+	DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
+	DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
+	DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
+	DML_LOG_VERBOSE("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
+	DML_LOG_VERBOSE("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes);
+	DML_LOG_VERBOSE("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw);
+	DML_LOG_VERBOSE("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
+	DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
+	DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
+	DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
+	DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
+	DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
+	DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip);
+	DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip);
+	DML_LOG_VERBOSE("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr);
+	DML_LOG_VERBOSE("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
+	DML_LOG_VERBOSE("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
+	DML_LOG_VERBOSE("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
+	DML_LOG_VERBOSE("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
+	DML_LOG_VERBOSE("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
+	DML_LOG_VERBOSE("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
+	DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
+	DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
+	DML_LOG_VERBOSE("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes);
+	DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes);
+#endif
 	s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
 	*p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
-	dml2_printf("DML::%s: LineTime: %f\n", __func__, s->LineTime);
-	dml2_printf("DML::%s: VStartup: %u\n", __func__, p->VStartup);
-	dml2_printf("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
-	dml2_printf("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
-	dml2_printf("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
-	dml2_printf("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
-	dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
-	dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
-	dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
-	dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait);
-	dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
-	dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
-	dml2_printf("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch);
-	dml2_printf("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
-	dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
-	dml2_printf("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p);
-	dml2_printf("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip);
-	dml2_printf("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
-	dml2_printf("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
-	dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor);
-	dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes);
-	dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
-	dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre));
-	dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
+	DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
+	DML_LOG_VERBOSE("DML::%s: LineTime: %f\n", __func__, s->LineTime);
+	DML_LOG_VERBOSE("DML::%s: VStartup: %u\n", __func__, p->VStartup);
+	DML_LOG_VERBOSE("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
+	DML_LOG_VERBOSE("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
+	DML_LOG_VERBOSE("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
+	DML_LOG_VERBOSE("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
+	DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
+	DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
+	DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
+	DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait);
+	DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
+	DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
+	DML_LOG_VERBOSE("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch);
+	DML_LOG_VERBOSE("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
+	DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
+	DML_LOG_VERBOSE("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p);
+	DML_LOG_VERBOSE("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip);
+	DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
+	DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
+	DML_LOG_VERBOSE("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor);
+	DML_LOG_VERBOSE("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes);
+	DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
+	DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, (s->dst_y_prefetch_equ * s->LineTime), *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime)));
+	DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
 #endif
 
 	*p->dst_y_per_vm_vblank = 0;
@@ -5596,19 +5530,19 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 		} else
 			s->prefetch_bw1 = 0;
 
-		dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1);
+		DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1);
 		if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) {
 			s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) /
 				(*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)));
-			dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded);
-			dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw);
-			dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
-			dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
-			dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
-			dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw));
-			dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1);
+			DML_LOG_VERBOSE("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)));
+			DML_LOG_VERBOSE("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded);
+			DML_LOG_VERBOSE("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw);
+			DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
+			DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+			DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
+			DML_LOG_VERBOSE("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw));
+			DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1);
 #endif
 		}
 
@@ -5620,10 +5554,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 		} else
 			s->prefetch_bw2 = 0;
 
-		dml2_printf("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2);
+		DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2);
 		if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) {
 			s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime);
-			dml2_printf("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2);
+			DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2);
 		}
 
 		// prefetch_bw3: 2*R0 + SW
@@ -5634,10 +5568,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 		} else
 			s->prefetch_bw3 = 0;
 
-		dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3);
+		DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3);
 		if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) {
 			s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
-			dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3);
+			DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3);
 		}
 
 		// prefetch_bw4: SW
@@ -5647,17 +5581,17 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 			s->prefetch_bw4 = 0;
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
-		dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre));
-		dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
-		dml2_printf("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips));
-		dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
-		dml2_printf("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2);
-		dml2_printf("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
-		dml2_printf("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1);
-		dml2_printf("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2);
-		dml2_printf("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3);
-		dml2_printf("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4);
+		DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
+		DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, s->dst_y_prefetch_equ * s->LineTime, *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime)));
+		DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
+		DML_LOG_VERBOSE("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips));
+		DML_LOG_VERBOSE("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
+		DML_LOG_VERBOSE("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2);
+		DML_LOG_VERBOSE("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
+		DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1);
+		DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2);
+		DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3);
+		DML_LOG_VERBOSE("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4);
 #endif
 		{
 			bool Case1OK = false;
@@ -5676,14 +5610,14 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 
 			double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes);
 
-			dml2_printf("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded);
-			dml2_printf("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded);
+			DML_LOG_VERBOSE("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded);
+			DML_LOG_VERBOSE("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded);
 
 			if (s->prefetch_bw1 > 0) {
 				double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1;
 				double row_transfer_time = total_row_bytes / s->prefetch_bw1;
-				dml2_printf("DML::%s: Case1: vm_transfer_time  = %f\n", __func__, vm_transfer_time);
-				dml2_printf("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time);
+				DML_LOG_VERBOSE("DML::%s: Case1: vm_transfer_time  = %f\n", __func__, vm_transfer_time);
+				DML_LOG_VERBOSE("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time);
 				if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
 					Case1OK = true;
 				}
@@ -5696,8 +5630,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 			if (s->prefetch_bw2 > 0) {
 				double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2;
 				double row_transfer_time = total_row_bytes / s->prefetch_bw2;
-				dml2_printf("DML::%s: Case2: vm_transfer_time  = %f\n", __func__, vm_transfer_time);
-				dml2_printf("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time);
+				DML_LOG_VERBOSE("DML::%s: Case2: vm_transfer_time  = %f\n", __func__, vm_transfer_time);
+				DML_LOG_VERBOSE("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time);
 				if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) {
 					Case2OK = true;
 				}
@@ -5709,8 +5643,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 			if (s->prefetch_bw3 > 0) {
 				double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3;
 				double row_transfer_time = total_row_bytes / s->prefetch_bw3;
-				dml2_printf("DML::%s: Case3: vm_transfer_time  = %f\n", __func__, vm_transfer_time);
-				dml2_printf("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time);
+				DML_LOG_VERBOSE("DML::%s: Case3: vm_transfer_time  = %f\n", __func__, vm_transfer_time);
+				DML_LOG_VERBOSE("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time);
 				if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
 					Case3OK = true;
 				}
@@ -5730,10 +5664,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 							p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
 							(p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: Case1OK: %u\n", __func__, Case1OK);
-			dml2_printf("DML::%s: Case2OK: %u\n", __func__, Case2OK);
-			dml2_printf("DML::%s: Case3OK: %u\n", __func__, Case3OK);
-			dml2_printf("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
+			DML_LOG_VERBOSE("DML::%s: Case1OK: %u\n", __func__, Case1OK);
+			DML_LOG_VERBOSE("DML::%s: Case2OK: %u\n", __func__, Case2OK);
+			DML_LOG_VERBOSE("DML::%s: Case3OK: %u\n", __func__, Case3OK);
+			DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
 #endif
 
 			if (s->prefetch_bw_equ > 0) {
@@ -5753,12 +5687,12 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 			} else {
 				s->Tvm_equ = 0;
 				s->Tr0_equ = 0;
-				dml2_printf("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
+				DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
 			}
 		}
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ);
-		dml2_printf("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ);
+		DML_LOG_VERBOSE("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ);
+		DML_LOG_VERBOSE("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ);
 #endif
 		// Use the more stressful prefetch schedule
 		if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
@@ -5769,7 +5703,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 			*p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
 			*p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: Using oto scheduling for prefetch\n", __func__);
+			DML_LOG_VERBOSE("DML::%s: Using oto scheduling for prefetch\n", __func__);
 #endif
 
 		} else {
@@ -5785,7 +5719,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 		*p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__);
+			DML_LOG_VERBOSE("DML::%s: Using equ bw scheduling for prefetch\n", __func__);
 #endif
 		}
 
@@ -5797,31 +5731,31 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 		*p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime);
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM);
-		dml2_printf("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
-		dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
-		dml2_printf("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch);
-		dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
-		dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
-		dml2_printf("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
-		dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
-		dml2_printf("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us);
+		DML_LOG_VERBOSE("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM);
+		DML_LOG_VERBOSE("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
+		DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+		DML_LOG_VERBOSE("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch);
+		DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
+		DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
+		DML_LOG_VERBOSE("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
+		DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+		DML_LOG_VERBOSE("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us);
 
-		dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk);
-		dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line);
-		dml2_printf("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes);
-		dml2_printf("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw);
+		DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk);
+		DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line);
+		DML_LOG_VERBOSE("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes);
+		DML_LOG_VERBOSE("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw);
 #endif
-		DML2_ASSERT(*p->dst_y_prefetch < 64);
+		DML_ASSERT(*p->dst_y_prefetch < 64);
 
 		unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime);
 		if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) {
 			*p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
 			*p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0);
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
-			dml2_printf("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
-			dml2_printf("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
+			DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
+			DML_LOG_VERBOSE("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
+			DML_LOG_VERBOSE("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
 #endif
 			if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
 				if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
@@ -5829,13 +5763,13 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 						(double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
 				} else {
 					s->NoTimeToPrefetch = true;
-					dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
+					DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
 					*p->VRatioPrefetchY = 0;
 				}
 #ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
-				dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
-				dml2_printf("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
+				DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
+				DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+				DML_LOG_VERBOSE("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
 #endif
 			}
 
@@ -5843,22 +5777,22 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 			*p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0);
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
-			dml2_printf("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
-			dml2_printf("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
+			DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
+			DML_LOG_VERBOSE("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
+			DML_LOG_VERBOSE("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
 #endif
 			if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
 				if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
 					*p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
 				} else {
 					s->NoTimeToPrefetch = true;
-					dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
+					DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
 					*p->VRatioPrefetchC = 0;
 				}
 #ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
-				dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
-				dml2_printf("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
+				DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
+				DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
+				DML_LOG_VERBOSE("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
 #endif
 			}
 
@@ -5866,36 +5800,34 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 			*p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime;
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
-			dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
-			dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
-			dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
-			dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
+			DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
+			DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
+			DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+			DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
+			DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
 #endif
 		} else {
 			s->NoTimeToPrefetch = true;
-			dml2_printf("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required);
-			dml2_printf("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ);
+			DML_LOG_VERBOSE("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required);
+			DML_LOG_VERBOSE("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ);
 			*p->VRatioPrefetchY = 0;
 			*p->VRatioPrefetchC = 0;
 			*p->RequiredPrefetchPixelDataBWLuma = 0;
 			*p->RequiredPrefetchPixelDataBWChroma = 0;
 		}
-		dml2_printf("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM);
-		dml2_printf("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM);
-		dml2_printf("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank);
-		dml2_printf("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime);
-		dml2_printf("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime);
-		dml2_printf("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n");
-		dml2_printf("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
-		dml2_printf("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
+		DML_LOG_VERBOSE("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM);
+		DML_LOG_VERBOSE("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM);
+		DML_LOG_VERBOSE("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank);
+		DML_LOG_VERBOSE("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime);
+		DML_LOG_VERBOSE("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime);
+		DML_LOG_VERBOSE("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n");
+		DML_LOG_VERBOSE("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
+		DML_LOG_VERBOSE("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
 
 	} else {
-		dml2_printf("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
-		dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n",
+		DML_LOG_VERBOSE("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
+		DML_LOG_VERBOSE("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n",
 				__func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime);
-		dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded+Tvm_trips_rounded+2.0*Tr0_trips_rounded+min_Tsw_equ (%f) should be > \n",
-				__func__, tpre_gt_req_latency, (s->min_Lsw_equ*s->LineTime + s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded), p->Turg, s->trip_to_mem, p->ExtraLatencyPrefetch);
 		s->NoTimeToPrefetch = true;
 		s->TimeForFetchingVM = 0;
 		s->TimeForFetchingRowInVBlank = 0;
@@ -5916,18 +5848,18 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 			prefetch_vm_bw = 0;
 		} else if (*p->dst_y_per_vm_vblank > 0) {
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
-			dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
-			dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+			DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
+			DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
+			DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
 #endif
 			prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime);
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
+			DML_LOG_VERBOSE("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
 #endif
 		} else {
 			prefetch_vm_bw = 0;
 			s->NoTimeToPrefetch = true;
-			dml2_printf("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank);
+			DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank);
 		}
 
 		if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) {
@@ -5936,14 +5868,14 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 			prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime);
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
-			dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
-			dml2_printf("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
+			DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
+			DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
+			DML_LOG_VERBOSE("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
 #endif
 		} else {
 			prefetch_row_bw = 0;
 			s->NoTimeToPrefetch = true;
-			dml2_printf("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank);
+			DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank);
 		}
 
 		*p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw);
@@ -5963,12 +5895,12 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
 		*p->prefetch_vmrow_bw = 0;
 	}
 
-	dml2_printf("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank);
-	dml2_printf("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank);
-	dml2_printf("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw);
-	dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
-	dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
-	dml2_printf("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch);
+	DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank);
+	DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank);
+	DML_LOG_VERBOSE("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw);
+	DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
+	DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
+	DML_LOG_VERBOSE("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch);
 
 	return s->NoTimeToPrefetch;
 }
@@ -6005,7 +5937,7 @@ static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned
 		}
 	}
 	if (max_idx <= 0) {
-		DML2_ASSERT(max_idx >= 0);
+		DML_ASSERT(max_idx >= 0);
 		max_idx = this_plane_idx;
 	}
 
@@ -6037,12 +5969,12 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core
 	// worst case if the rob and cdb is fully hogged
 	s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0);
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes);
-	dml2_printf("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes);
-	dml2_printf("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes);
-	dml2_printf("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps);
-	dml2_printf("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz);
-	dml2_printf("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles);
+	DML_LOG_VERBOSE("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes);
+	DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes);
+	DML_LOG_VERBOSE("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes);
+	DML_LOG_VERBOSE("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps);
+	DML_LOG_VERBOSE("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz);
+	DML_LOG_VERBOSE("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles);
 #endif
 
 	// calculate the return impact from each plane, request is 256B per dcfclk
@@ -6063,12 +5995,12 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core
 		s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]);
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]);
-		dml2_printf("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l);
-		dml2_printf("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]);
-		dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]);
-		dml2_printf("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]);
-		dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det);
+		DML_LOG_VERBOSE("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]);
+		DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l);
+		DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]);
+		DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]);
+		DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]);
+		DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det);
 #endif
 
 		if (s->src_swath_bytes_c[i] > 0) { // dual_plane
@@ -6079,10 +6011,10 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core
 			}
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c);
-			dml2_printf("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]);
-			dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]);
-			dml2_printf("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]);
+			DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c);
+			DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]);
+			DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]);
+			DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]);
 #endif
 		}
 
@@ -6090,9 +6022,9 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core
 		s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det);
-		dml2_printf("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us);
-		dml2_printf("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]);
+		DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det);
+		DML_LOG_VERBOSE("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us);
+		DML_LOG_VERBOSE("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]);
 #endif
 		// clamping to worst case delay which is one which occupy the full rob+cdb
 		if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles)
@@ -6109,7 +6041,7 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core
 		p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25);
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k);
+		DML_LOG_VERBOSE("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k);
 #endif
 	}
 
@@ -6120,8 +6052,8 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core
 				*p->recalc_prefetch_schedule = 1;
 			}
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]);
-			dml2_printf("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]);
+			DML_LOG_VERBOSE("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]);
+			DML_LOG_VERBOSE("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]);
 #endif
 		}
 	} else {
@@ -6131,8 +6063,8 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core
 	}
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed);
-	dml2_printf("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule);
+	DML_LOG_VERBOSE("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed);
+	DML_LOG_VERBOSE("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule);
 #endif
 
 	return s->prefetch_global_check_passed;
@@ -6150,8 +6082,8 @@ static void calculate_peak_bandwidth_required(
 	memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals));
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw);
-	dml2_printf("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes);
+	DML_LOG_VERBOSE("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw);
+	DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes);
 #endif
 
 	for (unsigned int k = 0; k < p->num_active_planes; ++k) {
@@ -6347,12 +6279,12 @@ static void calculate_peak_bandwidth_required(
 				p->surface_peak_required_bw[m][n]);
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]);
-			dml2_printf("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
-			dml2_printf("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
-			dml2_printf("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]);
+			DML_LOG_VERBOSE("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]);
+			DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
+			DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
+			DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]);
 #endif
-			DML2_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]);
+			DML_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]);
 		}
 	}
 }
@@ -6414,18 +6346,18 @@ static void check_urgent_bandwidth_support(
 	}
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp);
-	dml2_printf("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram);
-	dml2_printf("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom);
+	DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp);
+	DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram);
+	DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom);
 
-	dml2_printf("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp);
-	dml2_printf("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram);
-	dml2_printf("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall);
-	dml2_printf("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok);
+	DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp);
+	DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram);
+	DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall);
+	DML_LOG_VERBOSE("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok);
 
 	for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
 		for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
-			dml2_printf("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
+			DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
 			__func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
 			urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required[m][n]) ? "<" : ">=", urg_bandwidth_required[m][n]);
 		}
@@ -6446,14 +6378,14 @@ static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal
 	flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
-	dml2_printf("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]);
-	dml2_printf("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]);
-	dml2_printf("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]);
-	dml2_printf("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]);
-	dml2_printf("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps);
-	dml2_printf("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps);
-	dml2_printf("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps);
+	DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
+	DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]);
+	DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]);
+	DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]);
+	DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]);
+	DML_LOG_VERBOSE("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps);
+	DML_LOG_VERBOSE("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps);
+	DML_LOG_VERBOSE("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps);
 #endif
 
 	return flip_bw_available_mbps;
@@ -6478,28 +6410,28 @@ static void calculate_immediate_flip_bandwidth_support(
 		*flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n];
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n));
-		dml2_printf("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]);
-		dml2_printf("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]);
-		dml2_printf("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]);
-		dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
+		DML_LOG_VERBOSE("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n));
+		DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]);
+		DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]);
+		DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]);
+		DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
 #endif
-		DML2_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]);
+		DML_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]);
 	}
 
 	*frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram;
 	*flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1.0);
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
-	dml2_printf("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp);
-	dml2_printf("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram);
-	dml2_printf("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip);
-	dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
+	DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
+	DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp);
+	DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram);
+	DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip);
+	DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
 
 	for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
 		for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
-			dml2_printf("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
+			DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
 			__func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
 			urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]);
 		}
@@ -6549,27 +6481,27 @@ static void CalculateFlipSchedule(
 	l->dpte_row_bytes = DPTEBytesPerRow;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
-	dml2_printf("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us);
-	dml2_printf("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines);
-	dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
-	dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
-	dml2_printf("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw);
-	dml2_printf("DML::%s: iflip_enable = %u\n", __func__, iflip_enable);
-	dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
-	dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime);
-	dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip);
-	dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip);
-	dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip);
-	dml2_printf("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded);
-	dml2_printf("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded);
-	dml2_printf("DML::%s: vm_bytes = %f\n", __func__, vm_bytes);
-	dml2_printf("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
-	dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes);
-	dml2_printf("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes);
-	dml2_printf("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height);
-	dml2_printf("DML::%s: meta_row_height = %d\n", __func__, meta_row_height);
-	dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio);
+	DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
+	DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us);
+	DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines);
+	DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
+	DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
+	DML_LOG_VERBOSE("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw);
+	DML_LOG_VERBOSE("DML::%s: iflip_enable = %u\n", __func__, iflip_enable);
+	DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+	DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime);
+	DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip);
+	DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip);
+	DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip);
+	DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded);
+	DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded);
+	DML_LOG_VERBOSE("DML::%s: vm_bytes = %f\n", __func__, vm_bytes);
+	DML_LOG_VERBOSE("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
+	DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes);
+	DML_LOG_VERBOSE("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes);
+	DML_LOG_VERBOSE("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height);
+	DML_LOG_VERBOSE("DML::%s: meta_row_height = %d\n", __func__, meta_row_height);
+	DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio);
 #endif
 
 	if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) {
@@ -6596,9 +6528,9 @@ static void CalculateFlipSchedule(
 			l->min_row_time = l->min_row_height * LineTime / VRatio;
 		}
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: min_row_time = %f\n", __func__, l->min_row_time);
+		DML_LOG_VERBOSE("DML::%s: min_row_time = %f\n", __func__, l->min_row_time);
 #endif
-		DML2_ASSERT(l->min_row_time > 0);
+		DML_ASSERT(l->min_row_time > 0);
 
 		if (use_lb_flip_bw) {
 			// For mode check, calculation the flip bw requirement with worst case flip time
@@ -6619,20 +6551,20 @@ static void CalculateFlipSchedule(
 					l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded),
 					l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
 #ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time);
-				dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes);
-				dml2_printf("DML::%s: total row bytes (%d row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes);
-				dml2_printf("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes);
-				dml2_printf("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip));
-				dml2_printf("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded));
-				dml2_printf("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
+				DML_LOG_VERBOSE("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time);
+				DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes);
+				DML_LOG_VERBOSE("DML::%s: total row bytes (%f row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes);
+				DML_LOG_VERBOSE("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes);
+				DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip));
+				DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded));
+				DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
 
 				if (l->lb_flip_bw > 0) {
-					dml2_printf("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw);
-					dml2_printf("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows);
-					dml2_printf("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime);
-					dml2_printf("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows);
-					dml2_printf("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded));
+					DML_LOG_VERBOSE("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw);
+					DML_LOG_VERBOSE("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows);
+					DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime);
+					DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows);
+					DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded));
 				}
 #endif
 				l->lb_flip_bw = math_max3(l->lb_flip_bw,
@@ -6640,8 +6572,8 @@ static void CalculateFlipSchedule(
 						(l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
 
 #ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip);
-				dml2_printf("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
+				DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip);
+				DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
 #endif
 			}
 
@@ -6653,13 +6585,12 @@ static void CalculateFlipSchedule(
 		} else {
 			if (iflip_enable) {
 				l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i)
-				double portion = (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes;
 
 #ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes);
-				dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
-				dml2_printf("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW);
-				dml2_printf("DML::%s: portion of flip bw = %f\n", __func__, portion);
+				DML_LOG_VERBOSE("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes);
+				DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
+				DML_LOG_VERBOSE("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW);
+				DML_LOG_VERBOSE("DML::%s: portion of flip bw = %f\n", __func__, (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes);
 #endif
 				if (l->ImmediateFlipBW == 0) {
 					l->Tvm_flip = 0;
@@ -6674,11 +6605,11 @@ static void CalculateFlipSchedule(
 						LineTime / 4.0);
 				}
 #ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor);
-				dml2_printf("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes));
+				DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor);
+				DML_LOG_VERBOSE("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes));
 
-				dml2_printf("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip);
-				dml2_printf("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip);
+				DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip);
+				DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip);
 #endif
 				*dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0;
 				*dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0;
@@ -6711,14 +6642,14 @@ static void CalculateFlipSchedule(
 
 #ifdef __DML_VBA_DEBUG__
 	if (!use_lb_flip_bw) {
-		dml2_printf("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip);
-		dml2_printf("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip);
-		dml2_printf("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip);
-		dml2_printf("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip);
-		dml2_printf("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time);
+		DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip);
+		DML_LOG_VERBOSE("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip);
+		DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip);
+		DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip);
+		DML_LOG_VERBOSE("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time);
 	}
-	dml2_printf("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
-	dml2_printf("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
+	DML_LOG_VERBOSE("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
+	DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
 #endif
 }
 
@@ -6736,7 +6667,7 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 	p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
+	DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
 #endif
 
 	p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency;
@@ -6755,20 +6686,20 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 	p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
-	dml2_printf("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency);
-	dml2_printf("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency);
-	dml2_printf("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time);
-	dml2_printf("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime);
-	dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
-	dml2_printf("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark);
-	dml2_printf("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark);
-	dml2_printf("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark);
-	dml2_printf("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark);
-	dml2_printf("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
-	dml2_printf("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
-	dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
-	dml2_printf("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us);
+	DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
+	DML_LOG_VERBOSE("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency);
+	DML_LOG_VERBOSE("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency);
+	DML_LOG_VERBOSE("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time);
+	DML_LOG_VERBOSE("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime);
+	DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
+	DML_LOG_VERBOSE("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark);
+	DML_LOG_VERBOSE("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark);
+	DML_LOG_VERBOSE("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark);
+	DML_LOG_VERBOSE("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark);
+	DML_LOG_VERBOSE("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
+	DML_LOG_VERBOSE("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
+	DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
+	DML_LOG_VERBOSE("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us);
 #endif
 
 	s->TotalActiveWriteback = 0;
@@ -6801,11 +6732,11 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 		p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark);
-	dml2_printf("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark);
-	dml2_printf("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark);
-	dml2_printf("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired);
-	dml2_printf("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency);
+	DML_LOG_VERBOSE("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark);
+	DML_LOG_VERBOSE("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark);
+	DML_LOG_VERBOSE("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark);
+	DML_LOG_VERBOSE("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired);
+	DML_LOG_VERBOSE("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency);
 #endif
 
 	s->TotalPixelBW = 0.0;
@@ -6836,11 +6767,11 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 		s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1));
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines);
-		dml2_printf("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
-		dml2_printf("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, LBBitPerPixel);
-		dml2_printf("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio);
-		dml2_printf("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps);
+		DML_LOG_VERBOSE("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines);
+		DML_LOG_VERBOSE("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
+		DML_LOG_VERBOSE("DML::%s: k=%u, LBBitPerPixel = %f\n", __func__, k, LBBitPerPixel);
+		DML_LOG_VERBOSE("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio);
+		DML_LOG_VERBOSE("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps);
 #endif
 
 		s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz);
@@ -6943,16 +6874,16 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 		s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k];
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
-		dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
-		dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
-		dml2_printf("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
-		dml2_printf("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]);
-		dml2_printf("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate);
-		dml2_printf("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l);
-		dml2_printf("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l);
-		dml2_printf("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]);
-		dml2_printf("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l);
+		DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate);
+		DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l);
+		DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l);
+		DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l);
 #endif
 		p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l;
 
@@ -6967,10 +6898,10 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 				p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c));
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]);
-			dml2_printf("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c);
-			dml2_printf("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c);
-			dml2_printf("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c);
+			DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c);
+			DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c);
+			DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c);
 #endif
 		}
 	}
@@ -6992,10 +6923,10 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 	}
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported);
-	dml2_printf("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported);
-	dml2_printf("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported);
-	dml2_printf("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport);
+	DML_LOG_VERBOSE("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported);
+	DML_LOG_VERBOSE("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported);
+	DML_LOG_VERBOSE("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported);
+	DML_LOG_VERBOSE("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport);
 #endif
 }
 
@@ -7141,7 +7072,7 @@ static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struc
 	unsigned int index = 0;
 
 	for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
-		dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz);
+		DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz);
 
 		if (i == 0)
 			index = 0;
@@ -7153,32 +7084,30 @@ static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struc
 			break;
 		}
 	}
-#if defined(__DML_VBA_DEBUG__)
-	dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz);
-	dml2_printf("DML::%s: index = %d\n", __func__, index);
-#endif
+	DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
+	DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index);
 	return index;
 }
 
 static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table)
 {
 	unsigned int i;
-	bool clk_entry_found = 0;
+	bool clk_entry_found = false;
 
 	for (i = 0; i < clk_table->uclk.num_clk_values; i++) {
-		dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]);
+		DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]);
 
 		if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) {
-			clk_entry_found = 1;
+			clk_entry_found = true;
 			break;
 		}
 	}
 
 	if (!clk_entry_found)
-		DML2_ASSERT(clk_entry_found);
+		DML_ASSERT(clk_entry_found);
 #if defined(__DML_VBA_DEBUG__)
-	dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
-	dml2_printf("DML::%s: index = %d\n", __func__, i);
+	DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
+	DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i);
 #endif
 	return i;
 }
@@ -7218,10 +7147,10 @@ static void calculate_hostvm_inefficiency_factor(
 			if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs))
 				*HostVMInefficiencyFactorPrefetch = 4;
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm);
-			dml2_printf("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only);
-			dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor);
-			dml2_printf("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch);
+			DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm);
+			DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only);
+			DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor);
+			DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch);
 #endif
 		}
 }
@@ -7335,30 +7264,659 @@ static void calculate_pstate_keepout_dst_lines(
 	}
 }
 
+static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_internal_display_mode_lib *mode_lib,
+	const struct dml2_display_cfg *display_cfg)
+{
+	struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals;
+	struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
+	struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
+	struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
+#ifdef DML_GLOBAL_PREFETCH_CHECK
+	struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
+#endif
+	struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
+
+	double min_return_bw_for_latency;
+	unsigned int k;
+
+	mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep;
+
+	calculate_hostvm_inefficiency_factor(
+			&s->HostVMInefficiencyFactor,
+			&s->HostVMInefficiencyFactorPrefetch,
+
+			display_cfg->gpuvm_enable,
+			display_cfg->hostvm_enable,
+			mode_lib->ip.remote_iommu_outstanding_translations,
+			mode_lib->soc.max_outstanding_reqs,
+			mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
+			mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
+
+	mode_lib->ms.Total3dlutActive = 0;
+	for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+		if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
+			mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1;
+
+		// Calculate tdlut schedule related terms
+		calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK;
+		calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
+		calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
+		calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
+		calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
+		calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
+		calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
+		calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag;
+		calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling);
+
+		// output
+		calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
+		calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
+		calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
+		calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
+		calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
+		calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
+		calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
+
+		calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
+	}
+
+	min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active];
+
+	if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
+		s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
+										mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
+										mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
+
+	CalculateExtraLatency(
+		display_cfg,
+		mode_lib->ip.rob_buffer_size_kbytes,
+		mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
+		s->ReorderingBytes,
+		mode_lib->ms.DCFCLK,
+		mode_lib->ms.FabricClock,
+		mode_lib->ip.pixel_chunk_size_kbytes,
+		min_return_bw_for_latency,
+		mode_lib->ms.num_active_planes,
+		mode_lib->ms.NoOfDPP,
+		mode_lib->ms.dpte_group_bytes,
+		s->tdlut_bytes_per_group,
+		s->HostVMInefficiencyFactor,
+		s->HostVMInefficiencyFactorPrefetch,
+		mode_lib->soc.hostvm_min_page_size_kbytes,
+		mode_lib->soc.qos_parameters.qos_type,
+		!(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
+		mode_lib->soc.max_outstanding_reqs,
+		mode_lib->ms.support.request_size_bytes_luma,
+		mode_lib->ms.support.request_size_bytes_chroma,
+		mode_lib->ip.meta_chunk_size_kbytes,
+		mode_lib->ip.dchub_arb_to_ret_delay,
+		mode_lib->ms.TripToMemory,
+		mode_lib->ip.hostvm_mode,
+
+		// output
+		&mode_lib->ms.ExtraLatency,
+		&mode_lib->ms.ExtraLatency_sr,
+		&mode_lib->ms.ExtraLatencyPrefetch);
+
+	for (k = 0; k < mode_lib->ms.num_active_planes; k++)
+		s->impacted_dst_y_pre[k] = 0;
+
+	s->recalc_prefetch_schedule = 0;
+	s->recalc_prefetch_done = 0;
+	do {
+		mode_lib->ms.support.PrefetchSupported = true;
+
+		for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+			s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+			s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
+
+			s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
+																mode_lib->ms.NoOfDPP[k],
+																display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
+																display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
+																display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
+																display_cfg->plane_descriptors[k].composition.rotation_angle);
+
+			s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
+																mode_lib->ms.NoOfDPP[k],
+																display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
+																display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
+																display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
+																display_cfg->plane_descriptors[k].composition.rotation_angle);
+
+			struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
+
+			mode_lib->ms.TWait[k] = CalculateTWait(
+				display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
+				mode_lib->ms.UrgLatency,
+				mode_lib->ms.TripToMemory,
+				!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
+				get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx) : 0.0);
+
+			myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k];
+			myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK;
+			myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+			myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
+			myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k];
+			myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
+			myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+			myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+			myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
+			myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
+			myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
+			myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
+			myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
+			myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
+			myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
+			myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
+			myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
+			myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
+			myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
+			myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
+			myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
+			myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+			myPipe->ODMMode = mode_lib->ms.ODMMode[k];
+			myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
+			myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
+			myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
+			myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
+
+#ifdef __DML_VBA_DEBUG__
+			DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
+			DML_LOG_VERBOSE("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]);
+#endif
+			CalculatePrefetchSchedule_params->display_cfg = display_cfg;
+			CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
+			CalculatePrefetchSchedule_params->myPipe = myPipe;
+			CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k];
+			CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
+			CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
+			CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
+			CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
+			CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
+			CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
+			CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
+			CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
+			CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k];
+			CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
+			CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
+			CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
+			CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
+			CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
+			CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency;
+			CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch;
+			CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc;
+			CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k];
+			CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k];
+			CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k];
+			CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k];
+			CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k];
+			CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k];
+			CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k];
+			CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k];
+			CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k];
+			CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k];
+			CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k];
+			CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k];
+			CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k];
+			CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory;
+			CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency;
+			CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
+			CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
+			CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
+			CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
+			CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
+			CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
+			CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
+			CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
+			CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+			CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+			CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k];
+			CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k];
+			CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k];
+			CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k];
+			CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k];
+
+			// output
+			CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
+			CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k];
+			CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k];
+			CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k];
+			CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k];
+			CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k];
+			CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k];
+			CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l
+			CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c
+			CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &mode_lib->ms.RequiredPrefetchBWOTO[k];
+			CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k];
+			CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
+			CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k];
+			CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k];
+			CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0];
+			CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1];
+			CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2];
+			CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
+			CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
+			CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
+			CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
+			CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
+			CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
+			CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0];
+			CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
+			CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
+			CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k];
+			CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k];
+			CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k];
+			CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k];
+			CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k];
+
+			mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
+
+			mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k];
+			DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank);
+			DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank);
+		} // for k num_planes
+
+		CalculateDCFCLKDeepSleepTdlut(
+			display_cfg,
+			mode_lib->ms.num_active_planes,
+			mode_lib->ms.BytePerPixelY,
+			mode_lib->ms.BytePerPixelC,
+			mode_lib->ms.SwathWidthY,
+			mode_lib->ms.SwathWidthC,
+			mode_lib->ms.NoOfDPP,
+			mode_lib->ms.PSCL_FACTOR,
+			mode_lib->ms.PSCL_FACTOR_CHROMA,
+			mode_lib->ms.RequiredDPPCLK,
+			mode_lib->ms.vactive_sw_bw_l,
+			mode_lib->ms.vactive_sw_bw_c,
+			mode_lib->soc.return_bus_width_bytes,
+			mode_lib->ms.RequiredDISPCLK,
+			s->tdlut_bytes_to_deliver,
+			s->prefetch_swath_time_us,
+
+			/* Output */
+			&mode_lib->ms.dcfclk_deepsleep);
+
+		for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+			if (mode_lib->ms.dst_y_prefetch[k] < 2.0
+				|| mode_lib->ms.LinesForVM[k] >= 32.0
+				|| mode_lib->ms.LinesForDPTERow[k] >= 16.0
+				|| mode_lib->ms.NoTimeForPrefetch[k] == true
+				|| s->DSTYAfterScaler[k] > 8) {
+				mode_lib->ms.support.PrefetchSupported = false;
+				DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]);
+			}
+		}
+
+		mode_lib->ms.support.DynamicMetadataSupported = true;
+		for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+			if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) {
+				mode_lib->ms.support.DynamicMetadataSupported = false;
+			}
+		}
+
+		mode_lib->ms.support.VRatioInPrefetchSupported = true;
+		for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+			if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
+				mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
+				mode_lib->ms.support.VRatioInPrefetchSupported = false;
+				DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
+				DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
+				DML_LOG_VERBOSE("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported);
+			}
+		}
+
+		mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported;
+
+		// By default, do not recalc prefetch schedule
+		s->recalc_prefetch_schedule = 0;
+
+		// Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok
+		if (mode_lib->ms.support.PrefetchSupported) {
+			for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+				// Calculate Urgent burst factor for prefetch
+#ifdef __DML_VBA_DEBUG__
+				DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k);
+				DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]);
+#endif
+				CalculateUrgentBurstFactor(
+					&display_cfg->plane_descriptors[k],
+					mode_lib->ms.swath_width_luma_ub[k],
+					mode_lib->ms.swath_width_chroma_ub[k],
+					mode_lib->ms.SwathHeightY[k],
+					mode_lib->ms.SwathHeightC[k],
+					s->line_times[k],
+					mode_lib->ms.UrgLatency,
+					mode_lib->ms.VRatioPreY[k],
+					mode_lib->ms.VRatioPreC[k],
+					mode_lib->ms.BytePerPixelInDETY[k],
+					mode_lib->ms.BytePerPixelInDETC[k],
+					mode_lib->ms.DETBufferSizeY[k],
+					mode_lib->ms.DETBufferSizeC[k],
+					/* Output */
+					&mode_lib->ms.UrgentBurstFactorLumaPre[k],
+					&mode_lib->ms.UrgentBurstFactorChromaPre[k],
+					&mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
+			}
+
+			// Calculate urgent bandwidth required, both urg and non urg peak bandwidth
+			// assume flip bw is 0 at this point
+			for (k = 0; k < mode_lib->ms.num_active_planes; k++)
+				mode_lib->ms.final_flip_bw[k] = 0;
+
+			calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required;
+			calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required;
+			calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual;
+			calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required;
+			calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw;
+			calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
+
+			calculate_peak_bandwidth_params->display_cfg = display_cfg;
+			calculate_peak_bandwidth_params->inc_flip_bw = 0;
+			calculate_peak_bandwidth_params->num_active_planes =  mode_lib->ms.num_active_planes;
+			calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
+			calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
+			calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
+			calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
+			calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
+			calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
+			calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
+
+			calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
+			calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
+			calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
+			calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
+			calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO;
+			calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
+			calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
+			calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
+			calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
+			calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
+			calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
+			calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
+			calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
+			calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
+			calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
+			calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
+			calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
+			calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
+			calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
+
+			calculate_peak_bandwidth_required(
+					&mode_lib->scratch,
+					calculate_peak_bandwidth_params);
+
+			// Check urg peak bandwidth against available urg bw
+			// check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
+			check_urgent_bandwidth_support(
+				&s->dummy_single[0], // double* frac_urg_bandwidth
+				&s->dummy_single[1], // double* frac_urg_bandwidth_mall
+				&mode_lib->ms.support.UrgVactiveBandwidthSupport,
+				&mode_lib->ms.support.PrefetchBandwidthSupported,
+
+				mode_lib->soc.mall_allocated_for_dcn_mbytes,
+				mode_lib->ms.support.non_urg_bandwidth_required,
+				mode_lib->ms.support.urg_vactive_bandwidth_required,
+				mode_lib->ms.support.urg_bandwidth_required,
+				mode_lib->ms.support.urg_bandwidth_available);
+
+			mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported;
+			DML_LOG_VERBOSE("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported);
+
+			for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+				if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) {
+					mode_lib->ms.support.PrefetchSupported = false;
+					DML_LOG_VERBOSE("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
+				}
+			}
+
+#ifdef DML_GLOBAL_PREFETCH_CHECK
+			if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) {
+				CheckGlobalPrefetchAdmissibility_params->num_active_planes =  mode_lib->ms.num_active_planes;
+				CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format;
+				CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
+				CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
+				CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l;
+				CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c;
+				CheckGlobalPrefetchAdmissibility_params->swath_height_l =  mode_lib->ms.SwathHeightY;
+				CheckGlobalPrefetchAdmissibility_params->swath_height_c =  mode_lib->ms.SwathHeightC;
+				CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
+				CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte;
+				CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY;
+				CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC;
+				CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l;
+				CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c;
+				CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes;
+				CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded;
+				CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto;
+				CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
+				CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times;
+				CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch;
+				if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024)
+					CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024;
+
+				CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) /
+																				((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0);
+
+				// if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible
+				CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule;
+				CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre;
+				mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params);
+				s->recalc_prefetch_done = 1;
+				s->recalc_prefetch_schedule = 1;
+			}
+#endif
+		} // prefetch schedule ok, do urg bw and flip schedule
+	} while (s->recalc_prefetch_schedule);
+
+	// Flip Schedule
+	// Both prefetch schedule and BW okay
+	if (mode_lib->ms.support.PrefetchSupported == true) {
+		mode_lib->ms.BandwidthAvailableForImmediateFlip =
+			get_bandwidth_available_for_immediate_flip(
+				dml2_core_internal_soc_state_sys_active,
+				mode_lib->ms.support.urg_bandwidth_required_qual, // no flip
+				mode_lib->ms.support.urg_bandwidth_available);
+
+		mode_lib->ms.TotImmediateFlipBytes = 0;
+		for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+			if (display_cfg->plane_descriptors[k].immediate_flip) {
+				s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(
+								s->HostVMInefficiencyFactor,
+								mode_lib->ms.vm_bytes[k],
+								mode_lib->ms.DPTEBytesPerRow[k],
+								mode_lib->ms.meta_row_bytes[k]);
+			} else {
+				s->per_pipe_flip_bytes[k] = 0;
+			}
+			mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k];
+
+		}
+
+		for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+			CalculateFlipSchedule(
+				&mode_lib->scratch,
+				display_cfg->plane_descriptors[k].immediate_flip,
+				1, // use_lb_flip_bw
+				s->HostVMInefficiencyFactor,
+				s->Tvm_trips_flip[k],
+				s->Tr0_trips_flip[k],
+				s->Tvm_trips_flip_rounded[k],
+				s->Tr0_trips_flip_rounded[k],
+				display_cfg->gpuvm_enable,
+				mode_lib->ms.vm_bytes[k],
+				mode_lib->ms.DPTEBytesPerRow[k],
+				mode_lib->ms.BandwidthAvailableForImmediateFlip,
+				mode_lib->ms.TotImmediateFlipBytes,
+				display_cfg->plane_descriptors[k].pixel_format,
+				(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)),
+				display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
+				display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
+				mode_lib->ms.Tno_bw_flip[k],
+				mode_lib->ms.dpte_row_height[k],
+				mode_lib->ms.dpte_row_height_chroma[k],
+				mode_lib->ms.use_one_row_for_frame_flip[k],
+				mode_lib->ip.max_flip_time_us,
+				mode_lib->ip.max_flip_time_lines,
+				s->per_pipe_flip_bytes[k],
+				mode_lib->ms.meta_row_bytes[k],
+				s->meta_row_height_luma[k],
+				s->meta_row_height_chroma[k],
+				mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
+
+				/* Output */
+				&mode_lib->ms.dst_y_per_vm_flip[k],
+				&mode_lib->ms.dst_y_per_row_flip[k],
+				&mode_lib->ms.final_flip_bw[k],
+				&mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
+		}
+
+		calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
+		calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip;
+		calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
+		calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip;
+		calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
+		calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
+
+		calculate_peak_bandwidth_params->display_cfg = display_cfg;
+		calculate_peak_bandwidth_params->inc_flip_bw = 1;
+		calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
+		calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
+		calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
+		calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
+		calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
+		calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
+		calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
+		calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
+
+		calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
+		calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
+		calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
+		calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
+		calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO;
+		calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
+		calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
+		calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
+		calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
+		calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
+		calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
+		calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
+		calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
+		calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
+		calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
+		calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
+		calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
+		calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
+		calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
+
+		calculate_peak_bandwidth_required(
+				&mode_lib->scratch,
+				calculate_peak_bandwidth_params);
+
+		calculate_immediate_flip_bandwidth_support(
+			&s->dummy_single[0], // double* frac_urg_bandwidth_flip
+			&mode_lib->ms.support.ImmediateFlipSupport,
+
+			dml2_core_internal_soc_state_sys_active,
+			mode_lib->ms.support.urg_bandwidth_required_flip,
+			mode_lib->ms.support.non_urg_bandwidth_required_flip,
+			mode_lib->ms.support.urg_bandwidth_available);
+
+		for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+			if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)
+				mode_lib->ms.support.ImmediateFlipSupport = false;
+		}
+
+	} else { // if prefetch not support, assume iflip is not supported too
+		mode_lib->ms.support.ImmediateFlipSupport = false;
+	}
+
+	s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
+	s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency;
+	s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr;
+	s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
+	s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
+	s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
+	s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
+	s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
+	s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
+	s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
+	s->mSOCParameters.USRRetrainingLatency = 0;
+	s->mSOCParameters.SMNLatency = 0;
+	s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx);
+	s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, mode_lib->ms.state_idx);
+	s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock;
+	s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
+
+	CalculateWatermarks_params->display_cfg = display_cfg;
+	CalculateWatermarks_params->USRRetrainingRequired = false;
+	CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+	CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
+	CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
+	CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
+	CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK;
+	CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
+	CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
+	CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
+	CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters;
+	CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
+	CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK;
+	CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
+	CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
+	CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
+	CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY;
+	CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC;
+	CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY;
+	CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC;
+	CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
+	CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY;
+	CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC;
+	CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler;
+	CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler;
+	CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled;
+	CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte;
+	CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma;
+	CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma;
+
+	// Output
+	CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark
+	CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport;
+	CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported;
+	CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[]
+	CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[]
+	CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport;
+	CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported;
+	CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported
+	CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport;
+	CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support;
+	CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin;
+	CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs;
+
+	CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
+
+	calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]);
+	DML_LOG_VERBOSE("DML::%s: Done prefetch calculation\n", __func__);
+
+}
+
 static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params)
 {
 	struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
 	const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
 	const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
 
-#if defined(__DML_VBA_DEBUG__)
-	double old_ReadBandwidthLuma;
-	double old_ReadBandwidthChroma;
-#endif
 	double outstanding_latency_us = 0;
-	double min_return_bw_for_latency;
 
 	struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals;
-	struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
 	struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
 	struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
-	struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
-#ifdef DML_GLOBAL_PREFETCH_CHECK
-	struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
-#endif
-	struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
 	struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
-	struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
 	struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params;
 	unsigned int k, m, n;
 
@@ -7374,9 +7932,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 	mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000);
 	mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000;
 	mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000;
-	mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dispclk / 1000;
+	mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dispclk / 1000;
 	mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000;
-	mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dppclk / 1000;
+	mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dppclk / 1000;
 	mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config);
 	mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000);
 	mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000);
@@ -7384,25 +7942,25 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 	mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table);
 
 #if defined(__DML_VBA_DEBUG__)
-	dml2_printf("DML::%s: --- START --- \n", __func__);
-	dml2_printf("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
-	dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
-	dml2_printf("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index);
-	dml2_printf("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK);
-	dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps);
-	dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
-	dml2_printf("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
-	dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
-	dml2_printf("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK);
-	dml2_printf("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz);
-	dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz);
-	dml2_printf("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz);
-	dml2_printf("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock);
-	dml2_printf("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes);
-	dml2_printf("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present);
+	DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__);
+	DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
+	DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
+	DML_LOG_VERBOSE("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index);
+	DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK);
+	DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps);
+	DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
+	DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
+	DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
+	DML_LOG_VERBOSE("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK);
+	DML_LOG_VERBOSE("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz);
+	DML_LOG_VERBOSE("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz);
+	DML_LOG_VERBOSE("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz);
+	DML_LOG_VERBOSE("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock);
+	DML_LOG_VERBOSE("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes);
+	DML_LOG_VERBOSE("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present);
 
 	for (k = 0; k < mode_lib->ms.num_active_planes; k++)
-		dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
+		DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
 #endif
 
 	CalculateMaxDETAndMinCompressedBufferSize(
@@ -7504,12 +8062,10 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 			display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
 
 #ifdef __DML_VBA_DEBUG__
-		old_ReadBandwidthLuma = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
-		old_ReadBandwidthChroma = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0;
-		dml2_printf("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, old_ReadBandwidthLuma);
-		dml2_printf("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, old_ReadBandwidthChroma);
-		dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]);
-		dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
+		DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0);
+		DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]);
 #endif
 	}
 
@@ -7629,13 +8185,13 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 		mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
 		mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
 
-		dml2_printf("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]);
-		dml2_printf("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma);
-		dml2_printf("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
+		DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma);
+		DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
 
-		dml2_printf("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]);
-		dml2_printf("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma);
-		dml2_printf("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
+		DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma);
+		DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
 	}
 
 	/* Cursor Support Check */
@@ -7672,11 +8228,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 			mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) {
 			mode_lib->ms.support.PitchSupport = false;
 #if defined(__DML_VBA_DEBUG__)
-			dml2_printf("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]);
-			dml2_printf("DML::%s: k=%u PitchY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch);
-			dml2_printf("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]);
-			dml2_printf("DML::%s: k=%u PitchC = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch);
-			dml2_printf("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport);
+			DML_LOG_VERBOSE("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u PitchY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch);
+			DML_LOG_VERBOSE("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u PitchC = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch);
+			DML_LOG_VERBOSE("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport);
 #endif
 		}
 
@@ -7708,11 +8264,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 				display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) {
 				mode_lib->ms.support.ViewportExceedsSurface = true;
 #if defined(__DML_VBA_DEBUG__)
-				dml2_printf("DML::%s: k=%u ViewportWidth = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
-				dml2_printf("DML::%s: k=%u SurfaceWidthY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width);
-				dml2_printf("DML::%s: k=%u ViewportHeight = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
-				dml2_printf("DML::%s: k=%u SurfaceHeightY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height);
-				dml2_printf("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface);
+				DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
+				DML_LOG_VERBOSE("DML::%s: k=%u SurfaceWidthY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width);
+				DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
+				DML_LOG_VERBOSE("DML::%s: k=%u SurfaceHeightY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height);
+				DML_LOG_VERBOSE("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface);
 #endif
 			}
 			if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
@@ -7894,8 +8450,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 			mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC;
 		}
 #if defined(__DML_VBA_DEBUG__)
-		dml2_printf("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
-		dml2_printf("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
 #endif
 
 		// ensure the number dsc slices is integer multiple based on ODM mode
@@ -7911,9 +8467,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 					mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 4) == 0);
 #if defined(__DML_VBA_DEBUG__)
 				if (!mode_lib->ms.support.DSCSlicesODMModeSupported) {
-					dml2_printf("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k);
-					dml2_printf("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices);
-					dml2_printf("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
+					DML_LOG_VERBOSE("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k);
+					DML_LOG_VERBOSE("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices);
+					DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
 				}
 #endif
 			} else {
@@ -7958,7 +8514,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 			mode_lib->ms.MPCCombine[k] = false;
 			mode_lib->ms.NoOfDPP[k] = 1;
 			if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
-				dml2_printf("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__);
+				DML_LOG_VERBOSE("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__);
 			}
 		} else {
 			if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
@@ -7968,7 +8524,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 			}
 		}
 #if defined(__DML_VBA_DEBUG__)
-		dml2_printf("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]);
 #endif
 	}
 
@@ -8138,7 +8694,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 				display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
 				display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout);
 
-			if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_clocks_khz.dtbclk / 1000)) {
+			if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_ss_clocks_khz.dtbclk / 1000)) {
 				mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true;
 			}
 		} else {
@@ -8167,7 +8723,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 				s->DSCFormatFactor = 1;
 			}
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
 #endif
 			if (mode_lib->ms.RequiresDSC[k] == true) {
 				s->PixelClockBackEndFactor = 3.0;
@@ -8185,10 +8741,10 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 				}
 
 #ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]);
-				dml2_printf("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]);
-				dml2_printf("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
-				dml2_printf("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported);
+				DML_LOG_VERBOSE("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
+				DML_LOG_VERBOSE("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported);
 #endif
 			}
 		}
@@ -8423,13 +8979,13 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 			mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false;
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]);
-		dml2_printf("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]);
 #endif
 	}
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded);
-	dml2_printf("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded);
+	DML_LOG_VERBOSE("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded);
+	DML_LOG_VERBOSE("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded);
 #endif
 
 	/* VActive bytes to fetch for UCLK P-State */
@@ -8502,7 +9058,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 
 	for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
 		double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
-		bool cursor_not_enough_urgent_latency_hiding = 0;
+		bool cursor_not_enough_urgent_latency_hiding = false;
 
 		if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
 			calculate_cursor_req_attributes(
@@ -8531,9 +9087,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 		mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k];
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k);
-		dml2_printf("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
-		dml2_printf("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
+		DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k);
+		DML_LOG_VERBOSE("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
+		DML_LOG_VERBOSE("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
 #endif
 
 		CalculateUrgentBurstFactor(
@@ -8605,7 +9161,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 	}
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]);
+	DML_LOG_VERBOSE("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]);
 #endif
 
 	/* Immediate Flip and MALL parameters */
@@ -8654,16 +9210,15 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 		(s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod);
-	dml2_printf("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod);
-	dml2_printf("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod);
-	dml2_printf("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz);
-	dml2_printf("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState);
-	dml2_printf("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index);
-	dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
-	dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
-	dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
-	dml2_printf("DML::%s: urgent latency tolarance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)));
+	DML_LOG_VERBOSE("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod);
+	DML_LOG_VERBOSE("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod);
+	DML_LOG_VERBOSE("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod);
+	DML_LOG_VERBOSE("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz);
+	DML_LOG_VERBOSE("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState);
+	DML_LOG_VERBOSE("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index);
+	DML_LOG_VERBOSE("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
+	DML_LOG_VERBOSE("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
+	DML_LOG_VERBOSE("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
 #endif
 
 	mode_lib->ms.support.OutstandingRequestsSupport = true;
@@ -8703,10 +9258,10 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 			}
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us);
-			dml2_printf("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us);
-			dml2_printf("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]);
-			dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us);
+			DML_LOG_VERBOSE("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us);
+			DML_LOG_VERBOSE("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us);
+			DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us);
 #endif
 		}
 
@@ -8722,8 +9277,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 				mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
 			}
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]);
-			dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us);
+			DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us);
 #endif
 		}
 	}
@@ -8869,7 +9424,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 	for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
 		if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) {
 			mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false;
-			dml2_printf("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k);
+			DML_LOG_VERBOSE("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k);
 
 		}
 	}
@@ -8878,639 +9433,13 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 			if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) {
 				mode_lib->ms.support.AvgBandwidthSupport = false;
 #ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n));
+				DML_LOG_VERBOSE("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n));
 #endif
 			}
 		}
 	}
 
-	/* Prefetch Check */
-	{
-		mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep;
-
-		calculate_hostvm_inefficiency_factor(
-				&s->HostVMInefficiencyFactor,
-				&s->HostVMInefficiencyFactorPrefetch,
-
-				display_cfg->gpuvm_enable,
-				display_cfg->hostvm_enable,
-				mode_lib->ip.remote_iommu_outstanding_translations,
-				mode_lib->soc.max_outstanding_reqs,
-				mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
-				mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
-
-		mode_lib->ms.Total3dlutActive = 0;
-		for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
-			if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
-				mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1;
-
-			// Calculate tdlut schedule related terms
-			calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK;
-			calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
-			calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
-			calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
-			calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
-			calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
-			calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
-			calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag;
-			calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling);
-
-			// output
-			calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
-			calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
-			calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
-			calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
-			calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
-			calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
-			calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
-
-			calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
-		}
-
-		min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active];
-
-		if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
-			s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
-											mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
-											mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
-
-		CalculateExtraLatency(
-			display_cfg,
-			mode_lib->ip.rob_buffer_size_kbytes,
-			mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
-			s->ReorderingBytes,
-			mode_lib->ms.DCFCLK,
-			mode_lib->ms.FabricClock,
-			mode_lib->ip.pixel_chunk_size_kbytes,
-			min_return_bw_for_latency,
-			mode_lib->ms.num_active_planes,
-			mode_lib->ms.NoOfDPP,
-			mode_lib->ms.dpte_group_bytes,
-			s->tdlut_bytes_per_group,
-			s->HostVMInefficiencyFactor,
-			s->HostVMInefficiencyFactorPrefetch,
-			mode_lib->soc.hostvm_min_page_size_kbytes,
-			mode_lib->soc.qos_parameters.qos_type,
-			!(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
-			mode_lib->soc.max_outstanding_reqs,
-			mode_lib->ms.support.request_size_bytes_luma,
-			mode_lib->ms.support.request_size_bytes_chroma,
-			mode_lib->ip.meta_chunk_size_kbytes,
-			mode_lib->ip.dchub_arb_to_ret_delay,
-			mode_lib->ms.TripToMemory,
-			mode_lib->ip.hostvm_mode,
-
-			// output
-			&mode_lib->ms.ExtraLatency,
-			&mode_lib->ms.ExtraLatency_sr,
-			&mode_lib->ms.ExtraLatencyPrefetch);
-
-		for (k = 0; k < mode_lib->ms.num_active_planes; k++)
-			s->impacted_dst_y_pre[k] = 0;
-
-		s->recalc_prefetch_schedule = 0;
-		s->recalc_prefetch_done = 0;
-		do {
-			mode_lib->ms.support.PrefetchSupported = true;
-
-			for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
-				s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
-				s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
-
-				s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
-																	mode_lib->ms.NoOfDPP[k],
-																	display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
-																	display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
-																	display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
-																	display_cfg->plane_descriptors[k].composition.rotation_angle);
-
-				s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
-																	mode_lib->ms.NoOfDPP[k],
-																	display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
-																	display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
-																	display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
-																	display_cfg->plane_descriptors[k].composition.rotation_angle);
-
-				struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
-
-				mode_lib->ms.TWait[k] = CalculateTWait(
-					display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
-					mode_lib->ms.UrgLatency,
-					mode_lib->ms.TripToMemory,
-					!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
-					get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0);
-
-				myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k];
-				myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK;
-				myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
-				myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
-				myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k];
-				myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
-				myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
-				myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
-				myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
-				myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
-				myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
-				myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
-				myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
-				myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
-				myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
-				myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
-				myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
-				myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
-				myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
-				myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
-				myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
-				myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
-				myPipe->ODMMode = mode_lib->ms.ODMMode[k];
-				myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
-				myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
-				myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
-				myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
-
-#ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
-				dml2_printf("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]);
-#endif
-				CalculatePrefetchSchedule_params->display_cfg = display_cfg;
-				CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
-				CalculatePrefetchSchedule_params->myPipe = myPipe;
-				CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k];
-				CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
-				CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
-				CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
-				CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
-				CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
-				CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
-				CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
-				CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
-				CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k];
-				CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
-				CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
-				CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
-				CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
-				CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
-				CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency;
-				CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch;
-				CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc;
-				CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k];
-				CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k];
-				CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k];
-				CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k];
-				CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k];
-				CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k];
-				CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k];
-				CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k];
-				CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k];
-				CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k];
-				CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k];
-				CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k];
-				CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k];
-				CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory;
-				CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency;
-				CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
-				CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
-				CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
-				CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
-				CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
-				CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
-				CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
-				CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
-				CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
-				CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
-				CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k];
-				CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k];
-				CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k];
-				CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k];
-				CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k];
-
-				// output
-				CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
-				CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k];
-				CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k];
-				CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k];
-				CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k];
-				CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k];
-				CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k];
-				CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l
-				CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c
-				CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &mode_lib->ms.RequiredPrefetchBWOTO[k];
-				CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k];
-				CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
-				CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k];
-				CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k];
-				CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0];
-				CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1];
-				CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2];
-				CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
-				CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
-				CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
-				CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
-				CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
-				CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
-				CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0];
-				CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
-				CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
-				CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k];
-				CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k];
-				CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k];
-				CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k];
-				CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k];
-
-				mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
-
-				mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k];
-				dml2_printf("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank);
-				dml2_printf("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank);
-			} // for k num_planes
-
-			CalculateDCFCLKDeepSleepTdlut(
-				display_cfg,
-				mode_lib->ms.num_active_planes,
-				mode_lib->ms.BytePerPixelY,
-				mode_lib->ms.BytePerPixelC,
-				mode_lib->ms.SwathWidthY,
-				mode_lib->ms.SwathWidthC,
-				mode_lib->ms.NoOfDPP,
-				mode_lib->ms.PSCL_FACTOR,
-				mode_lib->ms.PSCL_FACTOR_CHROMA,
-				mode_lib->ms.RequiredDPPCLK,
-				mode_lib->ms.vactive_sw_bw_l,
-				mode_lib->ms.vactive_sw_bw_c,
-				mode_lib->soc.return_bus_width_bytes,
-				mode_lib->ms.RequiredDISPCLK,
-				s->tdlut_bytes_to_deliver,
-				s->prefetch_swath_time_us,
-
-				/* Output */
-				&mode_lib->ms.dcfclk_deepsleep);
-
-			for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
-				if (mode_lib->ms.dst_y_prefetch[k] < 2.0
-					|| mode_lib->ms.LinesForVM[k] >= 32.0
-					|| mode_lib->ms.LinesForDPTERow[k] >= 16.0
-					|| mode_lib->ms.NoTimeForPrefetch[k] == true
-					|| s->DSTYAfterScaler[k] > 8) {
-					mode_lib->ms.support.PrefetchSupported = false;
-					dml2_printf("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]);
-					dml2_printf("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]);
-					dml2_printf("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]);
-					dml2_printf("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]);
-					dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]);
-				}
-			}
-
-			mode_lib->ms.support.DynamicMetadataSupported = true;
-			for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
-				if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) {
-					mode_lib->ms.support.DynamicMetadataSupported = false;
-				}
-			}
-
-			mode_lib->ms.support.VRatioInPrefetchSupported = true;
-			for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
-				if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
-					mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
-					mode_lib->ms.support.VRatioInPrefetchSupported = false;
-					dml2_printf("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
-					dml2_printf("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
-					dml2_printf("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported);
-				}
-			}
-
-			mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported;
-
-			// By default, do not recalc prefetch schedule
-			s->recalc_prefetch_schedule = 0;
-
-			// Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok
-			if (mode_lib->ms.support.PrefetchSupported) {
-				for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
-					// Calculate Urgent burst factor for prefetch
-#ifdef __DML_VBA_DEBUG__
-					dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k);
-					dml2_printf("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]);
-					dml2_printf("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]);
-#endif
-					CalculateUrgentBurstFactor(
-						&display_cfg->plane_descriptors[k],
-						mode_lib->ms.swath_width_luma_ub[k],
-						mode_lib->ms.swath_width_chroma_ub[k],
-						mode_lib->ms.SwathHeightY[k],
-						mode_lib->ms.SwathHeightC[k],
-						s->line_times[k],
-						mode_lib->ms.UrgLatency,
-						mode_lib->ms.VRatioPreY[k],
-						mode_lib->ms.VRatioPreC[k],
-						mode_lib->ms.BytePerPixelInDETY[k],
-						mode_lib->ms.BytePerPixelInDETC[k],
-						mode_lib->ms.DETBufferSizeY[k],
-						mode_lib->ms.DETBufferSizeC[k],
-						/* Output */
-						&mode_lib->ms.UrgentBurstFactorLumaPre[k],
-						&mode_lib->ms.UrgentBurstFactorChromaPre[k],
-						&mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
-				}
-
-				// Calculate urgent bandwidth required, both urg and non urg peak bandwidth
-				// assume flip bw is 0 at this point
-				for (k = 0; k < mode_lib->ms.num_active_planes; k++)
-					mode_lib->ms.final_flip_bw[k] = 0;
-
-				calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required;
-				calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required;
-				calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual;
-				calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required;
-				calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw;
-				calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
-
-				calculate_peak_bandwidth_params->display_cfg = display_cfg;
-				calculate_peak_bandwidth_params->inc_flip_bw = 0;
-				calculate_peak_bandwidth_params->num_active_planes =  mode_lib->ms.num_active_planes;
-				calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
-				calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
-				calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
-				calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
-				calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
-				calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
-				calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
-
-				calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
-				calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
-				calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
-				calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
-				calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO;
-				calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
-				calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
-				calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
-				calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
-				calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
-				calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
-				calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
-				calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
-				calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
-				calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
-				calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
-				calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
-				calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
-				calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
-
-				calculate_peak_bandwidth_required(
-						&mode_lib->scratch,
-						calculate_peak_bandwidth_params);
-
-				// Check urg peak bandwidth against available urg bw
-				// check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
-				check_urgent_bandwidth_support(
-					&s->dummy_single[0], // double* frac_urg_bandwidth
-					&s->dummy_single[1], // double* frac_urg_bandwidth_mall
-					&mode_lib->ms.support.UrgVactiveBandwidthSupport,
-					&mode_lib->ms.support.PrefetchBandwidthSupported,
-
-					mode_lib->soc.mall_allocated_for_dcn_mbytes,
-					mode_lib->ms.support.non_urg_bandwidth_required,
-					mode_lib->ms.support.urg_vactive_bandwidth_required,
-					mode_lib->ms.support.urg_bandwidth_required,
-					mode_lib->ms.support.urg_bandwidth_available);
-
-				mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported;
-				dml2_printf("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported);
-
-				for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
-					if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) {
-						mode_lib->ms.support.PrefetchSupported = false;
-						dml2_printf("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
-					}
-				}
-
-#ifdef DML_GLOBAL_PREFETCH_CHECK
-				if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) {
-					CheckGlobalPrefetchAdmissibility_params->num_active_planes =  mode_lib->ms.num_active_planes;
-					CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format;
-					CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
-					CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
-					CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l;
-					CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c;
-					CheckGlobalPrefetchAdmissibility_params->swath_height_l =  mode_lib->ms.SwathHeightY;
-					CheckGlobalPrefetchAdmissibility_params->swath_height_c =  mode_lib->ms.SwathHeightC;
-					CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
-					CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte;
-					CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY;
-					CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC;
-					CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l;
-					CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c;
-					CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes;
-					CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded;
-					CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto;
-					CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
-					CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times;
-					CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch;
-					if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024)
-						CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024;
-
-					CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) /
-																					((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0);
-
-					// if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible
-					CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule;
-					CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre;
-					mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params);
-					s->recalc_prefetch_done = 1;
-					s->recalc_prefetch_schedule = 1;
-				}
-#endif
-			} // prefetch schedule ok, do urg bw and flip schedule
-		} while (s->recalc_prefetch_schedule);
-
-		// Flip Schedule
-		// Both prefetch schedule and BW okay
-		if (mode_lib->ms.support.PrefetchSupported == true) {
-			mode_lib->ms.BandwidthAvailableForImmediateFlip =
-				get_bandwidth_available_for_immediate_flip(
-					dml2_core_internal_soc_state_sys_active,
-					mode_lib->ms.support.urg_bandwidth_required_qual, // no flip
-					mode_lib->ms.support.urg_bandwidth_available);
-
-			mode_lib->ms.TotImmediateFlipBytes = 0;
-			for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
-				if (display_cfg->plane_descriptors[k].immediate_flip) {
-					s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(
-									s->HostVMInefficiencyFactor,
-									mode_lib->ms.vm_bytes[k],
-									mode_lib->ms.DPTEBytesPerRow[k],
-									mode_lib->ms.meta_row_bytes[k]);
-				} else {
-					s->per_pipe_flip_bytes[k] = 0;
-				}
-				mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k];
-
-			}
-
-			for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
-				CalculateFlipSchedule(
-					&mode_lib->scratch,
-					display_cfg->plane_descriptors[k].immediate_flip,
-					1, // use_lb_flip_bw
-					s->HostVMInefficiencyFactor,
-					s->Tvm_trips_flip[k],
-					s->Tr0_trips_flip[k],
-					s->Tvm_trips_flip_rounded[k],
-					s->Tr0_trips_flip_rounded[k],
-					display_cfg->gpuvm_enable,
-					mode_lib->ms.vm_bytes[k],
-					mode_lib->ms.DPTEBytesPerRow[k],
-					mode_lib->ms.BandwidthAvailableForImmediateFlip,
-					mode_lib->ms.TotImmediateFlipBytes,
-					display_cfg->plane_descriptors[k].pixel_format,
-					(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)),
-					display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
-					display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
-					mode_lib->ms.Tno_bw_flip[k],
-					mode_lib->ms.dpte_row_height[k],
-					mode_lib->ms.dpte_row_height_chroma[k],
-					mode_lib->ms.use_one_row_for_frame_flip[k],
-					mode_lib->ip.max_flip_time_us,
-					mode_lib->ip.max_flip_time_lines,
-					s->per_pipe_flip_bytes[k],
-					mode_lib->ms.meta_row_bytes[k],
-					s->meta_row_height_luma[k],
-					s->meta_row_height_chroma[k],
-					mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
-
-					/* Output */
-					&mode_lib->ms.dst_y_per_vm_flip[k],
-					&mode_lib->ms.dst_y_per_row_flip[k],
-					&mode_lib->ms.final_flip_bw[k],
-					&mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
-			}
-
-			calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
-			calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip;
-			calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
-			calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip;
-			calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
-			calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
-
-			calculate_peak_bandwidth_params->display_cfg = display_cfg;
-			calculate_peak_bandwidth_params->inc_flip_bw = 1;
-			calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
-			calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
-			calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
-			calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
-			calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
-			calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
-			calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
-			calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
-
-			calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
-			calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
-			calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
-			calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
-			calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO;
-			calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
-			calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
-			calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
-			calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
-			calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
-			calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
-			calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
-			calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
-			calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
-			calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
-			calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
-			calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
-			calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
-			calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
-
-			calculate_peak_bandwidth_required(
-					&mode_lib->scratch,
-					calculate_peak_bandwidth_params);
-
-			calculate_immediate_flip_bandwidth_support(
-				&s->dummy_single[0], // double* frac_urg_bandwidth_flip
-				&mode_lib->ms.support.ImmediateFlipSupport,
-
-				dml2_core_internal_soc_state_sys_active,
-				mode_lib->ms.support.urg_bandwidth_required_flip,
-				mode_lib->ms.support.non_urg_bandwidth_required_flip,
-				mode_lib->ms.support.urg_bandwidth_available);
-
-			for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
-				if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)
-					mode_lib->ms.support.ImmediateFlipSupport = false;
-			}
-
-		} else { // if prefetch not support, assume iflip is not supported too
-			mode_lib->ms.support.ImmediateFlipSupport = false;
-		}
-
-		s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
-		s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency;
-		s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr;
-		s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
-		s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
-		s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
-		s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
-		s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
-		s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
-		s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
-		s->mSOCParameters.USRRetrainingLatency = 0;
-		s->mSOCParameters.SMNLatency = 0;
-		s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index);
-		s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index);
-		s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock;
-		s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
-
-		CalculateWatermarks_params->display_cfg = display_cfg;
-		CalculateWatermarks_params->USRRetrainingRequired = false;
-		CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
-		CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
-		CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
-		CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
-		CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK;
-		CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
-		CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
-		CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
-		CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters;
-		CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
-		CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK;
-		CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
-		CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
-		CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
-		CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY;
-		CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC;
-		CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY;
-		CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC;
-		CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
-		CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY;
-		CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC;
-		CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler;
-		CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler;
-		CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled;
-		CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte;
-		CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma;
-		CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma;
-
-		// Output
-		CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark
-		CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport;
-		CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported;
-		CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[]
-		CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[]
-		CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport;
-		CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported;
-		CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported
-		CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport;
-		CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support;
-		CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin;
-		CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs;
-
-		CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
-
-		calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]);
-	}
-	dml2_printf("DML::%s: Done prefetch calculation\n", __func__);
-	// End of Prefetch Check
+	dml_core_ms_prefetch_check(mode_lib, display_cfg);
 
 	mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us;
 
@@ -9546,8 +9475,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 			mode_lib->ms.dram_change_vactive_det_fill_delay_us);
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us);
-	dml2_printf("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport);
+	DML_LOG_VERBOSE("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us);
+	DML_LOG_VERBOSE("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport);
 #endif
 
 	/*Mode Support, Voltage State and SOC Configuration*/
@@ -9597,17 +9526,17 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 			&& !mode_lib->ms.support.ExceededMALLSize
 			&& mode_lib->ms.support.g6_temp_read_support
 			&& ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) {
-			dml2_printf("DML::%s: mode is supported\n", __func__);
+			DML_LOG_VERBOSE("DML::%s: mode is supported\n", __func__);
 			mode_lib->ms.support.ModeSupport = true;
 		} else {
-			dml2_printf("DML::%s: mode is NOT supported\n", __func__);
+			DML_LOG_VERBOSE("DML::%s: mode is NOT supported\n", __func__);
 			mode_lib->ms.support.ModeSupport = false;
 		}
 	}
 
 	// Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0).
-	dml2_printf("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport);
-	dml2_printf("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
+	DML_LOG_VERBOSE("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport);
+	DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
 
 	for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
 		mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k];
@@ -9623,8 +9552,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 		mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k];
 
 #if defined(__DML_VBA_DEBUG__)
-		dml2_printf("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]);
-		dml2_printf("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]);
 #endif
 	}
 
@@ -9632,7 +9561,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
 	if (!mode_lib->ms.support.ModeSupport)
 		dml2_print_mode_support_info(&mode_lib->ms.support, true);
 
-	dml2_printf("DML::%s: --- DONE --- \n", __func__);
+	DML_LOG_VERBOSE("DML::%s: --- DONE --- \n", __func__);
 #endif
 
 	return mode_lib->ms.support.ModeSupport;
@@ -9642,18 +9571,18 @@ unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support
 {
 	unsigned int result;
 
-	dml2_printf("DML::%s: ------------- START ----------\n", __func__);
+	DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__);
 	result = dml_core_mode_support(in_out_params);
 
 	if (result)
 		*in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support;
 
-	dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index);
+	DML_LOG_VERBOSE("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index);
 
 	for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++)
-		dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
+		DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
 
-	dml2_printf("DML::%s: ------------- DONE ----------\n", __func__);
+	DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__);
 
 	return result;
 }
@@ -9687,19 +9616,19 @@ static void CalculatePixelDeliveryTimes(
 		double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
-		dml2_printf("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
-		dml2_printf("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio);
-		dml2_printf("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
-		dml2_printf("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]);
-		dml2_printf("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]);
-		dml2_printf("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]);
-		dml2_printf("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]);
-		dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
-		dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
-		dml2_printf("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used);
-		dml2_printf("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz);
-		dml2_printf("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
+		DML_LOG_VERBOSE("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
+		DML_LOG_VERBOSE("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio);
+		DML_LOG_VERBOSE("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
+		DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used);
+		DML_LOG_VERBOSE("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz);
+		DML_LOG_VERBOSE("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]);
 #endif
 		if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
 			DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
@@ -9733,10 +9662,10 @@ static void CalculatePixelDeliveryTimes(
 			}
 		}
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
-		dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
-		dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
-		dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
 #endif
 	}
 
@@ -9752,12 +9681,12 @@ static void CalculatePixelDeliveryTimes(
 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k];
 		}
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
-		dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
-		dml2_printf("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]);
-		dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
-		dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
-		dml2_printf("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]);
 #endif
 	}
 }
@@ -9853,14 +9782,14 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE
 		}
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]);
-		dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]);
-		dml2_printf("DML::%s: k=%d, TimePerMetaChunkNominal		  = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]);
-		dml2_printf("DML::%s: k=%d, TimePerMetaChunkVBlank		   = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]);
-		dml2_printf("DML::%s: k=%d, TimePerMetaChunkFlip			 = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]);
-		dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkNominal	= %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]);
-		dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkVBlank	 = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]);
-		dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkFlip	   = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkNominal		  = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkVBlank		   = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkFlip			 = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkNominal	= %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkVBlank	 = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkFlip	   = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]);
 #endif
 	}
 
@@ -9881,7 +9810,7 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE
 		else
 			p->time_per_tdlut_group[k] = 0;
 
-		dml2_printf("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]);
 
 		if (p->display_cfg->gpuvm_enable == true) {
 			if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
@@ -9897,14 +9826,14 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE
 			if (dpte_groups_per_row_luma_ub <= 2) {
 				dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1;
 			}
-			dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
-			dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]);
-			dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]);
-			dml2_printf("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]);
-			dml2_printf("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]);
-			dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
-			dml2_printf("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma);
-			dml2_printf("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub);
+			DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma);
+			DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub);
 
 			p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
 			p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
@@ -9928,9 +9857,9 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE
 				if (dpte_groups_per_row_chroma_ub <= 2) {
 					dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1;
 				}
-				dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
-				dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma);
-				dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub);
+				DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma);
+				DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub);
 
 				p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
 				p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
@@ -9945,17 +9874,17 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE
 			p->time_per_pte_group_flip_chroma[k] = 0;
 		}
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]);
-		dml2_printf("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]);
 
-		dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]);
-		dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]);
-		dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]);
-		dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]);
-		dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]);
-		dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]);
-		dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]);
-		dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]);
 #endif
 	}
 } // CalculateMetaAndPTETimes
@@ -9991,18 +9920,18 @@ static void CalculateVMGroupAndRequestTimes(
 	double line_time;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
+	DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
 #endif
 	for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
 		double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
 		bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present;
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable);
-		dml2_printf("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]);
-		dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]);
-		dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]);
-		dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
-		dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable);
+		DML_LOG_VERBOSE("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
 #endif
 
 		if (display_cfg->gpuvm_enable) {
@@ -10071,13 +10000,13 @@ static void CalculateVMGroupAndRequestTimes(
 			else
 				TimePerVMRequestFlip[k] = 0.0;
 
-			dml2_printf("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]);
-			dml2_printf("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]);
-			dml2_printf("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time);
-			dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %f\n", __func__, k, num_group_per_lower_vm_stage_pref);
-			dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %f\n", __func__, k, num_group_per_lower_vm_stage_flip);
-			dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %f\n", __func__, k, num_req_per_lower_vm_stage_pref);
-			dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %f\n", __func__, k, num_req_per_lower_vm_stage_flip);
+			DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time);
+			DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %d\n", __func__, k, num_group_per_lower_vm_stage_pref);
+			DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %d\n", __func__, k, num_group_per_lower_vm_stage_flip);
+			DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %d\n", __func__, k, num_req_per_lower_vm_stage_pref);
+			DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %d\n", __func__, k, num_req_per_lower_vm_stage_flip);
 
 			if (display_cfg->gpuvm_max_page_table_levels > 2) {
 				TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
@@ -10094,10 +10023,10 @@ static void CalculateVMGroupAndRequestTimes(
 		}
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
-		dml2_printf("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
-		dml2_printf("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
-		dml2_printf("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
 #endif
 	}
 }
@@ -10113,7 +10042,6 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc
 	unsigned int SingleVTotal = 0;
 	bool SameTiming = true;
 	bool FoundCriticalSurface = false;
-	double LastZ8StutterPeriod = 0;
 
 	memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals));
 
@@ -10127,9 +10055,9 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc
 				}
 				l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma);
 #ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
-				dml2_printf("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0);
-				dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma);
+				DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0);
+				DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma);
 #endif
 				l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0;
 				l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma;
@@ -10142,9 +10070,9 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc
 					}
 					l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma);
 #ifdef __DML_VBA_DEBUG__
-					dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]);
-					dml2_printf("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1);
-					dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma);
+					DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]);
+					DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1);
+					DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma);
 #endif
 					l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1;
 					l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma;
@@ -10160,19 +10088,19 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc
 	l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
-	dml2_printf("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth);
-	dml2_printf("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth);
-	dml2_printf("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth);
-	dml2_printf("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma);
-	dml2_printf("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma);
-	dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
-	dml2_printf("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction);
+	DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
+	DML_LOG_VERBOSE("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth);
+	DML_LOG_VERBOSE("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth);
+	DML_LOG_VERBOSE("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth);
+	DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma);
+	DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma);
+	DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
+	DML_LOG_VERBOSE("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction);
 
-	dml2_printf("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0);
-	dml2_printf("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs);
-	dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte);
-	dml2_printf("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte);
+	DML_LOG_VERBOSE("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0);
+	DML_LOG_VERBOSE("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs);
+	DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte);
+	DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte);
 #endif
 	if (l->AverageDCCZeroSizeFraction == 1) {
 		l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
@@ -10189,10 +10117,10 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc
 
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
-		dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate));
-		dml2_printf("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
-		dml2_printf("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate));
+		DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
+		DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate));
+		DML_LOG_VERBOSE("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
+		DML_LOG_VERBOSE("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate));
 #endif
 	} else {
 		l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
@@ -10200,16 +10128,16 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc
 			((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0);
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
-		dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate);
+		DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
+		DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate);
 #endif
 	}
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries);
-	dml2_printf("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries);
-	dml2_printf("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate);
-	dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
+	DML_LOG_VERBOSE("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries);
+	DML_LOG_VERBOSE("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries);
+	DML_LOG_VERBOSE("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate);
+	DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
 #endif
 
 	*p->StutterPeriod = 0;
@@ -10220,15 +10148,15 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc
 			l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]);
 			l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024);
-			dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
-			dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
-			dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
-			dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth);
-			dml2_printf("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY);
-			dml2_printf("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath);
-			dml2_printf("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
-			dml2_printf("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY);
+			DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024);
+			DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth);
+			DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY);
+			DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath);
+			DML_LOG_VERBOSE("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
+			DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY);
 #endif
 
 			if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) {
@@ -10248,17 +10176,17 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc
 				l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1);
 
 #ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface);
-				dml2_printf("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod);
-				dml2_printf("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface);
-				dml2_printf("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface);
-				dml2_printf("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface);
-				dml2_printf("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface);
-				dml2_printf("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface);
-				dml2_printf("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface);
-				dml2_printf("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface);
-				dml2_printf("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface);
-				dml2_printf("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface);
+				DML_LOG_VERBOSE("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface);
+				DML_LOG_VERBOSE("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod);
+				DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface);
+				DML_LOG_VERBOSE("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface);
+				DML_LOG_VERBOSE("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface);
+				DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface);
+				DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface);
+				DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface);
+				DML_LOG_VERBOSE("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface);
+				DML_LOG_VERBOSE("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface);
+				DML_LOG_VERBOSE("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface);
 #endif
 			}
 		}
@@ -10276,14 +10204,14 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc
 
 	l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize);
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
-	dml2_printf("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0);
-	dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
-	dml2_printf("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024);
-	dml2_printf("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW);
-	dml2_printf("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth);
-	dml2_printf("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth);
-	dml2_printf("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK);
+	DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
+	DML_LOG_VERBOSE("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0);
+	DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
+	DML_LOG_VERBOSE("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024);
+	DML_LOG_VERBOSE("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW);
+	DML_LOG_VERBOSE("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth);
+	DML_LOG_VERBOSE("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth);
+	DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK);
 #endif
 
 	l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer
@@ -10292,10 +10220,10 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc
 		/ math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
 		*p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW;
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate));
-	dml2_printf("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64));
-	dml2_printf("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW);
-	dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
+	DML_LOG_VERBOSE("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate));
+	DML_LOG_VERBOSE("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64));
+	DML_LOG_VERBOSE("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW);
+	DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
 #endif
 	l->TotalActiveWriteback = 0;
 	memset(l->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
@@ -10324,9 +10252,9 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc
 
 	if (l->TotalActiveWriteback == 0) {
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime);
-		dml2_printf("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time);
-		dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
+		DML_LOG_VERBOSE("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime);
+		DML_LOG_VERBOSE("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time);
+		DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
 #endif
 		*p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100;
 		*p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100;
@@ -10339,11 +10267,11 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc
 		*p->Z8NumberOfStutterBurstsPerFrame = 0;
 	}
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface);
-	dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
-	dml2_printf("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank);
-	dml2_printf("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame);
-	dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
+	DML_LOG_VERBOSE("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface);
+	DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
+	DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank);
+	DML_LOG_VERBOSE("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame);
+	DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
 #endif
 
 	if (*p->StutterEfficiencyNotIncludingVBlank > 0) {
@@ -10358,7 +10286,7 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc
 	}
 
 	if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) {
-		LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
+		//LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
 		if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
 			*p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank;
 		} else {
@@ -10370,25 +10298,25 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc
 	}
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG);
-	dml2_printf("DML::%s: SameTiming = %u\n", __func__, SameTiming);
-	dml2_printf("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings);
-	dml2_printf("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
-	dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark);
-	dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
-	dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
-	dml2_printf("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency);
-	dml2_printf("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency);
-	dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
-	dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
+	DML_LOG_VERBOSE("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG);
+	DML_LOG_VERBOSE("DML::%s: SameTiming = %u\n", __func__, SameTiming);
+	DML_LOG_VERBOSE("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings);
+	DML_LOG_VERBOSE("DML::%s: LastZ8StutterPeriod = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod : 0);
+	DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark);
+	DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
+	DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
+	DML_LOG_VERBOSE("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency);
+	DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency);
+	DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
+	DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
 #endif
 
 	*p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface);
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface);
-	dml2_printf("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte);
-	dml2_printf("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+	DML_LOG_VERBOSE("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface);
+	DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte);
+	DML_LOG_VERBOSE("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
 #endif
 }
 
@@ -10422,7 +10350,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 	double max_uclk_mhz = 0;
 	double min_return_latency_in_DCFCLK_cycles = 0;
 
-	dml2_printf("DML::%s: --- START --- \n", __func__);
+	DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__);
 
 	memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch));
 	memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program));
@@ -10444,13 +10372,13 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 
 	for (k = 0; k < s->num_active_planes; ++k) {
 		unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
-		DML2_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4);
-		DML2_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 ||
+		DML_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4);
+		DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 ||
 					cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 ||
 					cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
 
 		if (cfg_support_info->stream_support_info[stream_index].odms_used > 1)
-			DML2_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
+			DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
 
 		switch (cfg_support_info->stream_support_info[stream_index].odms_used) {
 		case (4):
@@ -10476,51 +10404,51 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 	for (k = 0; k < s->num_active_planes; ++k) {
 		mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used;
 		mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0;
-		DML2_ASSERT(mode_lib->mp.Dppclk[k] > 0);
+		DML_ASSERT(mode_lib->mp.Dppclk[k] > 0);
 	}
 
 	for (k = 0; k < s->num_active_planes; ++k) {
 		unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
 		mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0;
-		dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]);
 	}
 
 	mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0;
 	mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0;
 
-	DML2_ASSERT(mode_lib->mp.Dcfclk > 0);
-	DML2_ASSERT(mode_lib->mp.FabricClock > 0);
-	DML2_ASSERT(mode_lib->mp.dram_bw_mbps > 0);
-	DML2_ASSERT(mode_lib->mp.uclk_freq_mhz > 0);
-	DML2_ASSERT(mode_lib->mp.GlobalDPPCLK > 0);
-	DML2_ASSERT(mode_lib->mp.Dispclk > 0);
-	DML2_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0);
-	DML2_ASSERT(s->SOCCLK > 0);
-
-#ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes);
-	dml2_printf("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
-	dml2_printf("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk);
-	dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock);
-	dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps);
-	dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz);
-	dml2_printf("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk);
+	DML_ASSERT(mode_lib->mp.Dcfclk > 0);
+	DML_ASSERT(mode_lib->mp.FabricClock > 0);
+	DML_ASSERT(mode_lib->mp.dram_bw_mbps > 0);
+	DML_ASSERT(mode_lib->mp.uclk_freq_mhz > 0);
+	DML_ASSERT(mode_lib->mp.GlobalDPPCLK > 0);
+	DML_ASSERT(mode_lib->mp.Dispclk > 0);
+	DML_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0);
+	DML_ASSERT(s->SOCCLK > 0);
+
+#ifdef __DML_VBA_DEBUG__
+	DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes);
+	DML_LOG_VERBOSE("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
+	DML_LOG_VERBOSE("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk);
+	DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock);
+	DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps);
+	DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz);
+	DML_LOG_VERBOSE("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk);
 	for (k = 0; k < s->num_active_planes; ++k) {
-		dml2_printf("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]);
-	}
-	dml2_printf("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK);
-	dml2_printf("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep);
-	dml2_printf("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK);
-	dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
-	dml2_printf("DML::%s: min_clk_table min_fclk_khz = %d\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz);
-	dml2_printf("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config));
+		DML_LOG_VERBOSE("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]);
+	}
+	DML_LOG_VERBOSE("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK);
+	DML_LOG_VERBOSE("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep);
+	DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK);
+	DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
+	DML_LOG_VERBOSE("DML::%s: min_clk_table min_fclk_khz = %ld\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz);
+	DML_LOG_VERBOSE("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config));
 	for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) {
-		dml2_printf("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]);
-		dml2_printf("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
+		DML_LOG_VERBOSE("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]);
+		DML_LOG_VERBOSE("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
 	}
 
 	for (k = 0; k < s->num_active_planes; k++)
-		dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
+		DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
 #endif
 
 	CalculateMaxDETAndMinCompressedBufferSize(
@@ -10617,8 +10545,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 			((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
 		mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
 		mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
-		dml2_printf("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
-		dml2_printf("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
+		DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
+		DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
 	}
 
 	CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
@@ -11097,7 +11025,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 		mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
 
 	for (k = 0; k < s->num_active_planes; ++k) {
-		bool cursor_not_enough_urgent_latency_hiding = 0;
+		bool cursor_not_enough_urgent_latency_hiding = false;
 		s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
 			((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
 
@@ -11173,8 +11101,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 			mode_lib->mp.WritebackDelay[k]);
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
-		dml2_printf("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]);
 #endif
 	}
 
@@ -11183,7 +11111,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 		s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip;
 	}
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required);
+	DML_LOG_VERBOSE("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required);
 #endif
 
 	if (s->num_active_planes > 1) {
@@ -11219,12 +11147,12 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 		s->DestinationLineTimesForPrefetchLessThan2 = false;
 		s->VRatioPrefetchMoreThanMax = false;
 
-		dml2_printf("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__);
+		DML_LOG_VERBOSE("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__);
 
 		for (k = 0; k < s->num_active_planes; ++k) {
 			struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
 
-			dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
 			mode_lib->mp.TWait[k] = CalculateTWait(
 					display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
 					mode_lib->mp.UrgentLatency,
@@ -11261,7 +11189,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 			myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
+			DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
 #endif
 			CalculatePrefetchSchedule_params->display_cfg = display_cfg;
 			CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
@@ -11356,7 +11284,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 				mode_lib->mp.impacted_prefetch_margin_us[k] = 0;
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
 #endif
 			mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k];
 		} // for k
@@ -11366,9 +11294,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 			if (mode_lib->mp.NoTimeToPrefetch[k] == true ||
 				mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] ||
 				mode_lib->mp.DSTYAfterScaler[k] > 8) {
-				dml2_printf("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
-				dml2_printf("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]);
-				dml2_printf("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]);
 				mode_lib->mp.PrefetchModeSupported = false;
 			}
 			if (mode_lib->mp.dst_y_prefetch[k] < 2)
@@ -11377,24 +11305,24 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 			if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
 				mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
 				s->VRatioPrefetchMoreThanMax = true;
-				dml2_printf("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
-				dml2_printf("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
-				dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
+				DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
+				DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
+				DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
 			}
 
 			if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) {
-				dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
 				mode_lib->mp.PrefetchModeSupported = false;
 			}
 		}
 
 		if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) {
-			dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
-			dml2_printf("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2);
+			DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
+			DML_LOG_VERBOSE("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2);
 			mode_lib->mp.PrefetchModeSupported = false;
 		}
 
-		dml2_printf("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__,
+		DML_LOG_VERBOSE("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__,
 			mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup);
 
 		// Prefetch schedule OK, now check prefetch bw
@@ -11422,24 +11350,24 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 					&mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
 
 #ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
-				dml2_printf("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]);
-				dml2_printf("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]);
-				dml2_printf("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]);
-				dml2_printf("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]);
 
-				dml2_printf("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]);
-				dml2_printf("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
+				DML_LOG_VERBOSE("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
 
-				dml2_printf("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]);
-				dml2_printf("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
-				dml2_printf("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
-				dml2_printf("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]);
-				dml2_printf("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]);
-				dml2_printf("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]);
-				dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]);
-				dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]);
-				dml2_printf("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]);
+				DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]);
 #endif
 			}
 
@@ -11503,11 +11431,11 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 				mode_lib->mp.urg_bandwidth_available);
 
 			if (!mode_lib->mp.PrefetchModeSupported)
-				dml2_printf("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__);
+				DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__);
 
 			for (k = 0; k < s->num_active_planes; ++k) {
 				if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) {
-					dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
+					DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
 					mode_lib->mp.PrefetchModeSupported = false;
 				}
 			}
@@ -11533,12 +11461,12 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 				}
 				mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k];
 #ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML::%s: k = %u\n", __func__, k);
-				dml2_printf("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]);
-				dml2_printf("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]);
-				dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]);
-				dml2_printf("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]);
-				dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes);
+				DML_LOG_VERBOSE("DML::%s: k = %u\n", __func__, k);
+				DML_LOG_VERBOSE("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]);
+				DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]);
+				DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]);
+				DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]);
+				DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes);
 #endif
 			}
 			for (k = 0; k < s->num_active_planes; ++k) {
@@ -11631,13 +11559,13 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 				mode_lib->mp.urg_bandwidth_available);
 
 			if (!mode_lib->mp.ImmediateFlipSupported)
-				dml2_printf("DML::%s: Bandwidth not sufficient for flip!", __func__);
+				DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for flip!", __func__);
 
 			for (k = 0; k < s->num_active_planes; ++k) {
 				if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) {
 					mode_lib->mp.ImmediateFlipSupported = false;
 #ifdef __DML_VBA_DEBUG__
-					dml2_printf("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k);
+					DML_LOG_VERBOSE("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k);
 #endif
 				}
 			}
@@ -11650,28 +11578,28 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 		mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported));
 
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported);
+		DML_LOG_VERBOSE("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported);
 		for (k = 0; k < s->num_active_planes; ++k)
-			dml2_printf("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
-		dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable);
-		dml2_printf("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported);
-		dml2_printf("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported);
+			DML_LOG_VERBOSE("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
+		DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable);
+		DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported);
+		DML_LOG_VERBOSE("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported);
 #endif
-		dml2_printf("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]);
+		DML_LOG_VERBOSE("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]);
 	}
 
 	for (k = 0; k < s->num_active_planes; ++k)
-		dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
 
 	if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) {
-		dml2_printf("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__);
+		DML_LOG_VERBOSE("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__);
 	} else {
-		dml2_printf("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__);
+		DML_LOG_VERBOSE("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__);
 
 		// DCC Configuration
 		for (k = 0; k < s->num_active_planes; ++k) {
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k);
+			DML_LOG_VERBOSE("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k);
 #endif
 			CalculateDCCConfiguration(
 				display_cfg->plane_descriptors[k].surface.dcc.enable,
@@ -11780,8 +11708,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 
 		calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines);
 
-		dml2_printf("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index);
-		dml2_printf("DML::%s: DEBUG PixelClock = %d kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz));
+		DML_LOG_VERBOSE("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index);
+		DML_LOG_VERBOSE("DML::%s: DEBUG PixelClock = %ld kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz));
 
 		//Display Pipeline Delivery Time in Prefetch, Groups
 		CalculatePixelDeliveryTimes(
@@ -11893,15 +11821,15 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 				mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k];
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
 #endif
 			s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
 			mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin;
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin);
-			dml2_printf("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
-			dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin);
+			DML_LOG_VERBOSE("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
 #endif
 
 			mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin;
@@ -11920,9 +11848,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 			s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k];
 
 			if (s->blank_lines_remaining < 0) {
-				dml2_printf("ERROR: Vstartup is larger than vblank!?\n");
+				DML_LOG_VERBOSE("ERROR: Vstartup is larger than vblank!?\n");
 				s->blank_lines_remaining = 0;
-				DML2_ASSERT(0);
+				DML_ASSERT(0);
 			}
 			mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup;
 
@@ -11936,18 +11864,18 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 				mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false;
 			}
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]);
-			dml2_printf("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]);
-			dml2_printf("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]);
-			dml2_printf("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]);
-			dml2_printf("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]);
-			dml2_printf("DML::%s: k=%u, HTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total);
-			dml2_printf("DML::%s: k=%u, VTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
-			dml2_printf("DML::%s: k=%u, VActive = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active);
-			dml2_printf("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
-			dml2_printf("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]);
-			dml2_printf("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]);
-			dml2_printf("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, HTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total);
+			DML_LOG_VERBOSE("DML::%s: k=%u, VTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
+			DML_LOG_VERBOSE("DML::%s: k=%u, VActive = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active);
+			DML_LOG_VERBOSE("DML::%s: k=%u, VFrontPorch = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
+			DML_LOG_VERBOSE("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]);
 #endif
 		}
 
@@ -11969,9 +11897,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 		for (k = 0; k < s->num_active_planes; ++k) {
 			mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.vactive_sw_bw_l[k] + mode_lib->mp.vactive_sw_bw_c[k];
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth);
-			dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
-			dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth);
+			DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
+			DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
 #endif
 		}
 
@@ -12051,28 +11979,28 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
 	min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz;
 	mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles;
 	mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles);
-	DML2_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256);
+	DML_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256);
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz);
-	dml2_printf("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz);
-	dml2_printf("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz);
-	dml2_printf("DML::%s: min_return_uclk_cycles = %d\n", __func__, min_return_uclk_cycles);
-	dml2_printf("DML::%s: min_return_fclk_cycles = %d\n", __func__, min_return_fclk_cycles);
-	dml2_printf("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles);
-	dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
-	dml2_printf("DML::%s: --- END --- \n", __func__);
+	DML_LOG_VERBOSE("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz);
+	DML_LOG_VERBOSE("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz);
+	DML_LOG_VERBOSE("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz);
+	DML_LOG_VERBOSE("DML::%s: min_return_uclk_cycles = %ld\n", __func__, min_return_uclk_cycles);
+	DML_LOG_VERBOSE("DML::%s: min_return_fclk_cycles = %ld\n", __func__, min_return_fclk_cycles);
+	DML_LOG_VERBOSE("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles);
+	DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
+	DML_LOG_VERBOSE("DML::%s: --- END --- \n", __func__);
 #endif
 	return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported);
 }
 
 bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params)
 {
-	dml2_printf("DML::%s: ------------- START ----------\n", __func__);
+	DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__);
 	bool result = dml_core_mode_programming(in_out_params);
 
-	dml2_printf("DML::%s: result = %0d\n", __func__, result);
-	dml2_printf("DML::%s: ------------- DONE ----------\n", __func__);
+	DML_LOG_VERBOSE("DML::%s: result = %0d\n", __func__, result);
+	DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__);
 	return result;
 }
 
@@ -12130,16 +12058,16 @@ void dml2_core_calcs_get_dpte_row_height(
 	unsigned int MacroTileHeight		= is_plane1 ? MacroTileHeightC : MacroTileHeightY;
 	unsigned int PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML: %s: is_plane1 = %u\n", __func__, is_plane1);
-	dml2_printf("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel);
-	dml2_printf("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
-	dml2_printf("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
-	dml2_printf("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
-	dml2_printf("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
-	dml2_printf("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
-	dml2_printf("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma);
-	dml2_printf("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
-	dml2_printf("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
+	DML_LOG_VERBOSE("DML: %s: is_plane1 = %u\n", __func__, is_plane1);
+	DML_LOG_VERBOSE("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel);
+	DML_LOG_VERBOSE("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
+	DML_LOG_VERBOSE("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
+	DML_LOG_VERBOSE("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
+	DML_LOG_VERBOSE("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
+	DML_LOG_VERBOSE("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
+	DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma);
+	DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
+	DML_LOG_VERBOSE("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
 #endif
 	unsigned int dummy_integer[21];
 
@@ -12193,16 +12121,16 @@ void dml2_core_calcs_get_dpte_row_height(
 	CalculateVMAndRowBytes(&mode_lib->scratch.calculate_vm_and_row_bytes_params);
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height);
+	DML_LOG_VERBOSE("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height);
 #endif
 }
 
 static bool is_dual_plane(enum dml2_source_format_class source_format)
 {
-	bool ret_val = 0;
+	bool ret_val = false;
 
 	if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha))
-		ret_val = 1;
+		ret_val = true;
 
 	return ret_val;
 }
@@ -12220,6 +12148,8 @@ static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const
 	wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz);
 	wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
 	wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz);
+	wm_regs->sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
+	wm_regs->sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz);
 	wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz);
 	wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz);
 	wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
@@ -12246,11 +12176,11 @@ void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs
 	cursor_dlg_regs->dst_x_offset = (unsigned int) ((dst_x_offset > 0) ? dst_x_offset : 0);
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position);
-	dml2_printf("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz);
-	dml2_printf("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz);
-	dml2_printf("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset);
-	dml2_printf("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset);
+	DML_LOG_VERBOSE("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position);
+	DML_LOG_VERBOSE("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz);
+	DML_LOG_VERBOSE("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz);
+	DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset);
+	DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset);
 #endif
 
 	cursor_dlg_regs->chunk_hdl_adjust = 3;
@@ -12286,7 +12216,7 @@ static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs,
 	double stored_swath_c_bytes;
 	bool is_phantom_pipe;
 
-	dml2_printf("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx);
+	DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx);
 
 	pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024);
 	min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes);
@@ -12329,19 +12259,19 @@ static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs,
 	if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) {
 		unsigned int p0_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx));
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear);
+		DML_LOG_VERBOSE("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear);
 #endif
-		DML2_ASSERT(p0_pte_row_height_linear >= 8);
+		DML_ASSERT(p0_pte_row_height_linear >= 8);
 
 		rq_regs->rq_regs_l.pte_row_height_linear = math_log2_approx(p0_pte_row_height_linear) - 3;
 		if (dual_plane) {
 			unsigned int p1_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx));
 
 #ifdef __DML_VBA_DEBUG__
-			dml2_printf("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear);
+			DML_LOG_VERBOSE("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear);
 #endif
 			if (sw_mode == dml2_sw_linear) {
-				DML2_ASSERT(p1_pte_row_height_linear >= 8);
+				DML_ASSERT(p1_pte_row_height_linear >= 8);
 			}
 			rq_regs->rq_regs_c.pte_row_height_linear = math_log2_approx(p1_pte_row_height_linear) - 3;
 		}
@@ -12375,12 +12305,12 @@ static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs,
 			if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) {
 				detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma
 #ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr);
+				DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr);
 #endif
 			} else {
 				detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma
 #ifdef __DML_VBA_DEBUG__
-				dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr);
+				DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr);
 #endif
 			}
 		}
@@ -12388,15 +12318,15 @@ static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs,
 	rq_regs->plane1_base_address = detile_buf_plane1_addr;
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe);
-	dml2_printf("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes);
-	dml2_printf("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes);
-	dml2_printf("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes);
-	dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr);
-	dml2_printf("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address);
+	DML_LOG_VERBOSE("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe);
+	DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes);
+	DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes);
+	DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes);
+	DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr);
+	DML_LOG_VERBOSE("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address);
 #endif
-	//dml2_printf_rq_regs_st(rq_regs);
-	dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
+	//DML_LOG_VERBOSE_rq_regs_st(rq_regs);
+	DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
 }
 
 static void rq_dlg_get_dlg_reg(
@@ -12411,10 +12341,10 @@ static void rq_dlg_get_dlg_reg(
 
 	memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals));
 
-	dml2_printf("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx);
+	DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx);
 
 	l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
-	DML2_ASSERT(l->plane_idx < DML2_MAX_PLANES);
+	DML_ASSERT(l->plane_idx < DML2_MAX_PLANES);
 
 	l->source_format = dml2_444_8;
 	l->odm_mode = dml2_odm_mode_bypass;
@@ -12444,18 +12374,18 @@ static void rq_dlg_get_dlg_reg(
 		l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000;
 		l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz;
 
-		dml2_printf("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx);
-		dml2_printf("DML_DLG: %s: htotal = %d\n", __func__, l->htotal);
-		dml2_printf("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz);
-		dml2_printf("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz);
-		dml2_printf("DML_DLG: %s: soc.refclk_mhz = %3.2f\n", __func__, mode_lib->soc.dchub_refclk_mhz);
-		dml2_printf("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz);
-		dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
-		dml2_printf("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced);
+		DML_LOG_VERBOSE("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx);
+		DML_LOG_VERBOSE("DML_DLG: %s: htotal = %d\n", __func__, l->htotal);
+		DML_LOG_VERBOSE("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz);
+		DML_LOG_VERBOSE("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz);
+		DML_LOG_VERBOSE("DML_DLG: %s: soc.refclk_mhz = %d\n", __func__, mode_lib->soc.dchub_refclk_mhz);
+		DML_LOG_VERBOSE("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz);
+		DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
+		DML_LOG_VERBOSE("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced);
 
-		DML2_ASSERT(l->refclk_freq_in_mhz != 0);
-		DML2_ASSERT(l->pclk_freq_in_mhz != 0);
-		DML2_ASSERT(l->ref_freq_to_pix_freq < 4.0);
+		DML_ASSERT(l->refclk_freq_in_mhz != 0);
+		DML_ASSERT(l->pclk_freq_in_mhz != 0);
+		DML_ASSERT(l->ref_freq_to_pix_freq < 4.0);
 
 		// Need to figure out which side of odm combine we're in
 		// Assume the pipe instance under the same plane is in order
@@ -12484,14 +12414,14 @@ static void rq_dlg_get_dlg_reg(
 			l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.)
 
 			disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq);
-			dml2_printf("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx);
-			dml2_printf("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane);
-			dml2_printf("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine);
-			dml2_printf("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor);
+			DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx);
+			DML_LOG_VERBOSE("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane);
+			DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine);
+			DML_LOG_VERBOSE("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor);
 		}
-		dml2_printf("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end);
+		DML_LOG_VERBOSE("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end);
 
-		DML2_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13));
+		DML_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13));
 
 		disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19));
 		disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8));
@@ -12500,20 +12430,20 @@ static void rq_dlg_get_dlg_reg(
 		l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]];
 		l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]);
 
-		dml2_printf("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank);
-		dml2_printf("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start);
-		dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
+		DML_LOG_VERBOSE("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank);
+		DML_LOG_VERBOSE("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start);
+		DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
 
 		l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]);
 		disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0;
 
-		dml2_printf("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0);
+		DML_LOG_VERBOSE("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0);
 
 		l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
 		l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
 
-		dml2_printf("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler);
-		dml2_printf("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler);
+		DML_LOG_VERBOSE("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler);
+		DML_LOG_VERBOSE("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler);
 
 		l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]];
 		l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]];
@@ -12521,28 +12451,28 @@ static void rq_dlg_get_dlg_reg(
 		l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]];
 		l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]];
 
-		dml2_printf("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch);
-		dml2_printf("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip);
-		dml2_printf("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip);
-		dml2_printf("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank);
-		dml2_printf("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank);
+		DML_LOG_VERBOSE("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch);
+		DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip);
+		DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip);
+		DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank);
+		DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank);
 
 		if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) {
-			DML2_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank));
+			DML_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank));
 		}
 
 		l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]];
 		l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]];
 
-		dml2_printf("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l);
-		dml2_printf("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c);
+		DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l);
+		DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c);
 
 		// Active
 		l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
 		l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
 
-		dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l);
-		dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l);
+		DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l);
+		DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l);
 
 		l->refcyc_per_line_delivery_pre_c = 0.0;
 		l->refcyc_per_line_delivery_c = 0.0;
@@ -12551,8 +12481,8 @@ static void rq_dlg_get_dlg_reg(
 			l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
 			l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
 
-			dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c);
-			dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c);
+			DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c);
+			DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c);
 		}
 
 		disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
@@ -12561,8 +12491,8 @@ static void rq_dlg_get_dlg_reg(
 		l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
 		l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
 
-		dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l);
-		dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l);
+		DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l);
+		DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l);
 
 		l->refcyc_per_req_delivery_pre_c = 0.0;
 		l->refcyc_per_req_delivery_c = 0.0;
@@ -12570,16 +12500,16 @@ static void rq_dlg_get_dlg_reg(
 			l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
 			l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
 
-			dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c);
-			dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c);
+			DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c);
+			DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c);
 		}
 
 		// TTU - Cursor
-		DML2_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1);
+		DML_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1);
 
 		// Assign to register structures
 		disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2));
-		DML2_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18));
+		DML_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18));
 
 		disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line
 		disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk
@@ -12592,10 +12522,10 @@ static void rq_dlg_get_dlg_reg(
 		disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19));
 		disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19));
 
-		dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
-		dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
-		dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
-		dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
+		DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
+		DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
+		DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
+		DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
 
 		disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
 		disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
@@ -12662,11 +12592,11 @@ static void rq_dlg_get_dlg_reg(
 		disp_ttu_regs->qos_ramp_disable_c = 0;
 		disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz);
 
-		// CHECK for HW registers' range, DML2_ASSERT or clamp
-		DML2_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13));
-		DML2_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13));
-		DML2_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13));
-		DML2_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13));
+		// CHECK for HW registers' range, DML_ASSERT or clamp
+		DML_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13));
+		DML_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13));
+		DML_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13));
+		DML_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13));
 		if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23))
 			disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1);
 
@@ -12680,16 +12610,16 @@ static void rq_dlg_get_dlg_reg(
 			disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1);
 
 
-		DML2_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8);
-		DML2_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13));
+		DML_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8);
+		DML_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13));
 
 		if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) {
-			dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1);
+			DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1);
 			l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1;
 		}
 		if (l->dual_plane) {
 			if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) {
-				dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1);
+				DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1);
 				l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1;
 			}
 		}
@@ -12700,20 +12630,20 @@ static void rq_dlg_get_dlg_reg(
 			if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23))
 				disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1);
 		}
-		DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13));
+		DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13));
 		if (l->dual_plane) {
-			DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13));
+			DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13));
 		}
 
-		DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13));
-		DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13));
-		DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13));
-		DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13));
-		DML2_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14));
-		DML2_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14));
-		DML2_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24));
+		DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13));
+		DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13));
+		DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13));
+		DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13));
+		DML_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14));
+		DML_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14));
+		DML_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24));
 
-		dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
+		DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
 
 	}
 }
@@ -12736,11 +12666,11 @@ static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, co
 	arb_param->pstate_stall_threshold = (unsigned int)(mode_lib->ip_caps.fams2.max_allow_delay_us * refclk_freq_in_mhz);
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding);
-	dml2_printf("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit);
-	dml2_printf("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes);
-	dml2_printf("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req);
-	dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis);
+	DML_LOG_VERBOSE("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding);
+	DML_LOG_VERBOSE("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit);
+	DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes);
+	DML_LOG_VERBOSE("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req);
+	DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis);
 #endif
 
 }
@@ -13013,10 +12943,10 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp
 
 	out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000;
 #if defined(__DML_VBA_DEBUG__)
-	dml2_printf("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us);
-	dml2_printf("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us);
-	dml2_printf("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines);
-	dml2_printf("DML::%s: vblank_reserved_time_us = %f\n", __func__, out->vblank_reserved_time_us);
+	DML_LOG_VERBOSE("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us);
+	DML_LOG_VERBOSE("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us);
+	DML_LOG_VERBOSE("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines);
+	DML_LOG_VERBOSE("DML::%s: vblank_reserved_time_us = %u\n", __func__, out->vblank_reserved_time_us);
 #endif
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h
index 4e502f0a6d20..bdee6ad7bc59 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h
@@ -1078,6 +1078,8 @@ struct dml2_core_calcs_mode_programming_locals {
 	enum dml2_source_format_class pixel_format[DML2_MAX_PLANES];
 	unsigned int lb_source_lines_l[DML2_MAX_PLANES];
 	unsigned int lb_source_lines_c[DML2_MAX_PLANES];
+	unsigned int num_dsc_slices[DML2_MAX_PLANES];
+	bool dsc_enable[DML2_MAX_PLANES];
 };
 
 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals {
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c
index 2504d9c2ec34..7a220c0141c2 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c
@@ -82,7 +82,7 @@ bool dml2_core_utils_is_420(enum dml2_source_format_class source_format)
 		val = 0;
 		break;
 	default:
-		DML2_ASSERT(0);
+		DML_ASSERT(0);
 		break;
 	}
 	return val;
@@ -145,7 +145,7 @@ bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format)
 		val = 0;
 		break;
 	default:
-		DML2_ASSERT(0);
+		DML_ASSERT(0);
 		break;
 	}
 	return val;
@@ -208,7 +208,7 @@ bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format)
 		val = 1;
 		break;
 	default:
-		DML2_ASSERT(0);
+		DML_ASSERT(0);
 		break;
 	}
 	return val;
@@ -216,104 +216,104 @@ bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format)
 
 void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only)
 {
-	dml2_printf("DML: ===================================== \n");
-	dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n");
+	DML_LOG_VERBOSE("DML: ===================================== \n");
+	DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n");
 	if (!fail_only || support->ScaleRatioAndTapsSupport == 0)
-		dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport);
+		DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport);
 	if (!fail_only || support->SourceFormatPixelAndScanSupport == 0)
-		dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport);
+		DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport);
 	if (!fail_only || support->ViewportSizeSupport == 0)
-		dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport);
+		DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport);
 	if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1)
-		dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion);
+		DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion);
 	if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1)
-		dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated);
+		DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated);
 	if (!fail_only || support->BPPForMultistreamNotIndicated == 1)
-		dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated);
+		DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated);
 	if (!fail_only || support->MultistreamWithHDMIOreDP == 1)
-		dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP);
+		DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP);
 	if (!fail_only || support->ExceededMultistreamSlots == 1)
-		dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots);
+		DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots);
 	if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1)
-		dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink);
+		DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink);
 	if (!fail_only || support->NotEnoughLanesForMSO == 1)
-		dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO);
+		DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO);
 	if (!fail_only || support->P2IWith420 == 1)
-		dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420);
+		DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420);
 	if (!fail_only || support->DSC422NativeNotSupported == 1)
-		dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported);
+		DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported);
 	if (!fail_only || support->DSCSlicesODMModeSupported == 0)
-		dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported);
+		DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported);
 	if (!fail_only || support->NotEnoughDSCUnits == 1)
-		dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits);
+		DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits);
 	if (!fail_only || support->NotEnoughDSCSlices == 1)
-		dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices);
+		DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices);
 	if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1)
-		dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe);
+		DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe);
 	if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1)
-		dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen);
+		DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen);
 	if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1)
-		dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported);
+		DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported);
 	if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0)
-		dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport);
+		DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport);
 	if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1)
-		dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported);
+		DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported);
 	if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1)
-		dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState);
+		DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState);
 	if (!fail_only || support->ROBSupport == 0)
-		dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport);
+		DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport);
 	if (!fail_only || support->OutstandingRequestsSupport == 0)
-		dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport);
+		DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport);
 	if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0)
-		dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance);
+		DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance);
 	if (!fail_only || support->DISPCLK_DPPCLK_Support == 0)
-		dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support);
+		DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support);
 	if (!fail_only || support->TotalAvailablePipesSupport == 0)
-		dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport);
+		DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport);
 	if (!fail_only || support->NumberOfOTGSupport == 0)
-		dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport);
+		DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport);
 	if (!fail_only || support->NumberOfHDMIFRLSupport == 0)
-		dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport);
+		DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport);
 	if (!fail_only || support->NumberOfDP2p0Support == 0)
-		dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support);
+		DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support);
 	if (!fail_only || support->EnoughWritebackUnits == 0)
-		dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits);
+		DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits);
 	if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0)
-		dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport);
+		DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport);
 	if (!fail_only || support->WritebackLatencySupport == 0)
-		dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport);
+		DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport);
 	if (!fail_only || support->CursorSupport == 0)
-		dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport);
+		DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport);
 	if (!fail_only || support->PitchSupport == 0)
-		dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport);
+		DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport);
 	if (!fail_only || support->ViewportExceedsSurface == 1)
-		dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface);
+		DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface);
 	if (!fail_only || support->PrefetchSupported == 0)
-		dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported);
+		DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported);
 	if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0)
-		dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport);
+		DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport);
 	if (!fail_only || support->AvgBandwidthSupport == 0)
-		dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport);
+		DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport);
 	if (!fail_only || support->DynamicMetadataSupported == 0)
-		dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported);
+		DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported);
 	if (!fail_only || support->VRatioInPrefetchSupported == 0)
-		dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported);
+		DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported);
 	if (!fail_only || support->PTEBufferSizeNotExceeded == 0)
-		dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded);
+		DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded);
 	if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0)
-		dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded);
+		DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded);
 	if (!fail_only || support->ExceededMALLSize == 1)
-		dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize);
+		DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize);
 	if (!fail_only || support->g6_temp_read_support == 0)
-		dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support);
+		DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support);
 	if (!fail_only || support->ImmediateFlipSupport == 0)
-		dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport);
+		DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport);
 	if (!fail_only || support->LinkCapacitySupport == 0)
-		dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport);
+		DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport);
 
 	if (!fail_only || support->ModeSupport == 0)
-		dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport);
-	dml2_printf("DML: ===================================== \n");
+		DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport);
+	DML_LOG_VERBOSE("DML: ===================================== \n");
 }
 
 const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)
@@ -358,9 +358,9 @@ void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_di
 			out_bpp[k] = 0;
 		}
 #ifdef __DML_VBA_DEBUG__
-		dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc);
-		dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable);
-		dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]);
+		DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc);
+		DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable);
+		DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]);
 #endif
 	}
 }
@@ -391,7 +391,7 @@ unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const
 	}
 
 #ifdef __DML_VBA_DEBUG__
-	dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes);
+	DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes);
 #endif
 	return num_active_pipes;
 }
@@ -452,7 +452,7 @@ unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw
 	else if (sw_mode == dml2_gfx11_sw_256kb_r_x)
 		return 262144;
 	else {
-		DML2_ASSERT(0);
+		DML_ASSERT(0);
 		return 256;
 	};
 }
@@ -498,8 +498,8 @@ int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode)
 		sw_mode == dml2_gfx11_sw_256kb_r_x)
 		version = 11;
 	else {
-		dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode);
-		DML2_ASSERT(0);
+		DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode);
+		DML_ASSERT(0);
 	}
 
 	return version;
@@ -511,7 +511,7 @@ unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, co
 	unsigned int index = 0;
 
 	for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
-		dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz);
+		DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz);
 
 		if (i == 0)
 			index = 0;
@@ -524,8 +524,8 @@ unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, co
 		}
 	}
 #if defined(__DML_VBA_DEBUG__)
-	dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz);
-	dml2_printf("DML::%s: index = %d\n", __func__, index);
+	DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
+	DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index);
 #endif
 	return index;
 }
@@ -533,32 +533,32 @@ unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, co
 unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table)
 {
 	unsigned int i;
-	bool clk_entry_found = 0;
+	bool clk_entry_found = false;
 
 	for (i = 0; i < clk_table->uclk.num_clk_values; i++) {
-		dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]);
+		DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]);
 
 		if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) {
-			clk_entry_found = 1;
+			clk_entry_found = true;
 			break;
 		}
 	}
 
 	if (!clk_entry_found)
-		DML2_ASSERT(clk_entry_found);
+		DML_ASSERT(clk_entry_found);
 #if defined(__DML_VBA_DEBUG__)
-	dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
-	dml2_printf("DML::%s: index = %d\n", __func__, i);
+	DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
+	DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i);
 #endif
 	return i;
 }
 
 bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format)
 {
-	bool ret_val = 0;
+	bool ret_val = false;
 
 	if (dml2_core_utils_is_420(source_format) || dml2_core_utils_is_422_planar(source_format) || (source_format == dml2_rgbe_alpha))
-		ret_val = 1;
+		ret_val = true;
 
 	return ret_val;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
index 15507926f3a4..f486b090bbfc 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
@@ -754,6 +754,8 @@ bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_
 	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz);
 	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
 	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz);
+	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
+	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz);
 	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz);
 	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz);
 	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
@@ -768,6 +770,8 @@ bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_
 	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz);
 	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
 	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz);
+	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
+	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz);
 	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz);
 	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz);
 	dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c
index f4b1a7d02d42..a265f254152c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c
@@ -182,6 +182,10 @@ static bool build_min_clock_table(const struct dml2_soc_bb *soc_bb, struct dml2_
 	min_table->max_clocks_khz.dtbclk = soc_bb->clk_table.dtbclk.clk_values_khz[soc_bb->clk_table.dtbclk.num_clk_values - 1];
 	min_table->max_clocks_khz.phyclk = soc_bb->clk_table.phyclk.clk_values_khz[soc_bb->clk_table.phyclk.num_clk_values - 1];
 
+	min_table->max_ss_clocks_khz.dispclk = (unsigned int)((double)min_table->max_clocks_khz.dispclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0));
+	min_table->max_ss_clocks_khz.dppclk = (unsigned int)((double)min_table->max_clocks_khz.dppclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0));
+	min_table->max_ss_clocks_khz.dtbclk = (unsigned int)((double)min_table->max_clocks_khz.dtbclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0));
+
 	min_table->max_clocks_khz.dcfclk = soc_bb->clk_table.dcfclk.clk_values_khz[soc_bb->clk_table.dcfclk.num_clk_values - 1];
 	min_table->max_clocks_khz.fclk = soc_bb->clk_table.fclk.clk_values_khz[soc_bb->clk_table.fclk.num_clk_values - 1];
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c
index f50662b83296..d88b3e0082dd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c
@@ -659,7 +659,7 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out)
 	for (i = 1; i <= PMO_DCN4_MAX_DISPLAYS; i++) {
 		switch (i) {
 		case 1:
-			DML2_ASSERT(base_strategy_list_1_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
+			DML_ASSERT(base_strategy_list_1_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
 
 			/* populate list */
 			pmo_dcn4_fams2_expand_base_pstate_strategies(
@@ -670,7 +670,7 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out)
 					&pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]);
 			break;
 		case 2:
-			DML2_ASSERT(base_strategy_list_2_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
+			DML_ASSERT(base_strategy_list_2_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
 
 			/* populate list */
 			pmo_dcn4_fams2_expand_base_pstate_strategies(
@@ -681,7 +681,7 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out)
 					&pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]);
 			break;
 		case 3:
-			DML2_ASSERT(base_strategy_list_3_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
+			DML_ASSERT(base_strategy_list_3_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
 
 			/* populate list */
 			pmo_dcn4_fams2_expand_base_pstate_strategies(
@@ -692,7 +692,7 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out)
 					&pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]);
 			break;
 		case 4:
-			DML2_ASSERT(base_strategy_list_4_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
+			DML_ASSERT(base_strategy_list_4_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
 
 			/* populate list */
 			pmo_dcn4_fams2_expand_base_pstate_strategies(
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c
index dc2ce5e77f57..4a7c4c62111e 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c
@@ -761,7 +761,7 @@ bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache
 				total_mcaches_required--;
 		}
 	}
-	dml2_printf("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required);
+	DML_LOG_VERBOSE("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required);
 
 	if (total_mcaches_required > dml->soc_bbox.num_dcc_mcaches) {
 		result = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c
deleted file mode 100644
index c506667897c4..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c
+++ /dev/null
@@ -1,31 +0,0 @@
-// SPDX-License-Identifier: MIT
-//
-// Copyright 2024 Advanced Micro Devices, Inc.
-
-#include "dml2_debug.h"
-
-int dml2_log_internal(const char *format, ...)
-{
-	return 0;
-}
-
-int dml2_printf(const char *format, ...)
-{
-#ifdef _DEBUG
-#ifdef _DEBUG_PRINTS
-	int result;
-	va_list args;
-	va_start(args, format);
-
-	result = vprintf(format, args);
-
-	va_end(args);
-
-	return result;
-#else
-	return 0;
-#endif
-#else
-	return 0;
-#endif
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h
index bfe6f236d2e4..b226225103c3 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h
@@ -5,55 +5,62 @@
 #ifndef __DML2_DEBUG_H__
 #define __DML2_DEBUG_H__
 
-#ifndef DML2_ASSERT
-#define DML2_ASSERT(condition) ((void)0)
-#endif
+#include "os_types.h"
+#define DML_ASSERT(condition) ASSERT(condition)
+#define DML_LOG_LEVEL_DEFAULT DML_LOG_LEVEL_WARN
+#define DML_LOG_INTERNAL(fmt, ...) dm_output_to_console(fmt, ## __VA_ARGS__)
 
-/*
- * DML_LOG_FATAL - fatal errors for unrecoverable DML states until a restart.
- * DML_LOG_ERROR - unexpected but recoverable failures inside DML
- * DML_LOG_WARN - unexpected inputs or events to DML
- * DML_LOG_INFO - high level tracing of DML interfaces
- * DML_LOG_DEBUG - detailed tracing of DML internal components
- * DML_LOG_VERBOSE - detailed tracing of DML calculation procedure
- */
-#if !defined(DML_LOG_LEVEL)
-#if defined(_DEBUG) && defined(_DEBUG_PRINTS)
-/* for backward compatibility with old macros */
-#define DML_LOG_LEVEL 5
-#else
-#define DML_LOG_LEVEL 0
-#endif
-#endif
+/* ASSERT with message output */
+#define DML_ASSERT_MSG(condition, fmt, ...)								\
+	do {												\
+		if (!(condition)) {									\
+			DML_LOG_ERROR("DML ASSERT hit in %s line %d\n", __func__, __LINE__);	\
+			DML_LOG_ERROR(fmt, ## __VA_ARGS__);						\
+			DML_ASSERT(condition);								\
+		}											\
+	} while (0)
+
+/* fatal errors for unrecoverable DML states until a full reset */
+#define DML_LOG_LEVEL_FATAL 0
+/* unexpected but recoverable failures inside DML */
+#define DML_LOG_LEVEL_ERROR 1
+/* unexpected inputs or events to DML */
+#define DML_LOG_LEVEL_WARN 2
+/* high level tracing of DML interfaces */
+#define DML_LOG_LEVEL_INFO 3
+/* detailed tracing of DML internal components */
+#define DML_LOG_LEVEL_DEBUG 4
+/* detailed tracing of DML calculation procedure */
+#define DML_LOG_LEVEL_VERBOSE 5
 
-#define DML_LOG_FATAL(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__)
-#if DML_LOG_LEVEL >= 1
-#define DML_LOG_ERROR(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__)
+#ifndef DML_LOG_LEVEL
+#define DML_LOG_LEVEL DML_LOG_LEVEL_DEFAULT
+#endif /* #ifndef DML_LOG_LEVEL */
+
+#define DML_LOG_FATAL(fmt, ...) DML_LOG_INTERNAL("[DML FATAL] " fmt, ## __VA_ARGS__)
+#if DML_LOG_LEVEL >= DML_LOG_LEVEL_ERROR
+#define DML_LOG_ERROR(fmt, ...) DML_LOG_INTERNAL("[DML ERROR] "fmt, ## __VA_ARGS__)
 #else
 #define DML_LOG_ERROR(fmt, ...) ((void)0)
 #endif
-#if DML_LOG_LEVEL >= 2
-#define DML_LOG_WARN(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__)
+#if DML_LOG_LEVEL >= DML_LOG_LEVEL_WARN
+#define DML_LOG_WARN(fmt, ...) DML_LOG_INTERNAL("[DML WARN] "fmt, ## __VA_ARGS__)
 #else
 #define DML_LOG_WARN(fmt, ...) ((void)0)
 #endif
-#if DML_LOG_LEVEL >= 3
-#define DML_LOG_INFO(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__)
+#if DML_LOG_LEVEL >= DML_LOG_LEVEL_INFO
+#define DML_LOG_INFO(fmt, ...) DML_LOG_INTERNAL("[DML INFO] "fmt, ## __VA_ARGS__)
 #else
 #define DML_LOG_INFO(fmt, ...) ((void)0)
 #endif
-#if DML_LOG_LEVEL >= 4
-#define DML_LOG_DEBUG(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__)
+#if DML_LOG_LEVEL >= DML_LOG_LEVEL_DEBUG
+#define DML_LOG_DEBUG(fmt, ...) DML_LOG_INTERNAL("[DML DEBUG] "fmt, ## __VA_ARGS__)
 #else
 #define DML_LOG_DEBUG(fmt, ...) ((void)0)
 #endif
-#if DML_LOG_LEVEL >= 5
-#define DML_LOG_VERBOSE(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__)
+#if DML_LOG_LEVEL >= DML_LOG_LEVEL_VERBOSE
+#define DML_LOG_VERBOSE(fmt, ...) DML_LOG_INTERNAL("[DML VERBOSE] "fmt, ## __VA_ARGS__)
 #else
 #define DML_LOG_VERBOSE(fmt, ...) ((void)0)
 #endif
-
-int dml2_log_internal(const char *format, ...);
-int dml2_printf(const char *format, ...);
-
-#endif
+#endif /* __DML2_DEBUG_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h
index d8d01dceacdd..00688b9f1df4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h
@@ -38,6 +38,12 @@ struct dml2_mcg_min_clock_table {
 	} max_clocks_khz;
 
 	struct {
+		unsigned int dispclk;
+		unsigned int dppclk;
+		unsigned int dtbclk;
+	} max_ss_clocks_khz;
+
+	struct {
 		unsigned int dprefclk;
 		unsigned int xtalclk;
 		unsigned int pcierefclk;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
index a966abd40788..5f1b49a50049 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
@@ -1082,22 +1082,22 @@ bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const s
 		if (stream_disp_cfg_index >= disp_cfg_index_max)
 			continue;
 
-		if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_bypass) {
-			scratch.odm_info.odm_factor = 1;
-		} else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_2to1) {
-			scratch.odm_info.odm_factor = 2;
-		} else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_4to1) {
-			scratch.odm_info.odm_factor = 4;
-		} else {
-			ASSERT(false);
-			scratch.odm_info.odm_factor = 1;
-		}
-
+		if (ctx->architecture == dml2_architecture_20) {
+			if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_bypass) {
+				scratch.odm_info.odm_factor = 1;
+			} else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_2to1) {
+				scratch.odm_info.odm_factor = 2;
+			} else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_4to1) {
+				scratch.odm_info.odm_factor = 4;
+			} else {
+				ASSERT(false);
+				scratch.odm_info.odm_factor = 1;
+			}
+		} else if (ctx->architecture == dml2_architecture_21) {
 		/* After DML2.1 update, ODM interpretation needs to change and is no longer same as for DML2.0.
 		 * This is not an issue with new resource management logic. This block ensure backcompat
 		 * with legacy pipe management with updated DML.
 		 * */
-		if (ctx->architecture == dml2_architecture_21) {
 			if (ODMMode[stream_disp_cfg_index] == 1) {
 				scratch.odm_info.odm_factor = 1;
 			} else if (ODMMode[stream_disp_cfg_index] == 2) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index 2061d43b92e1..5de775fd8fce 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -896,7 +896,7 @@ static void populate_dummy_dml_surface_cfg(struct dml_surface_cfg_st *out, unsig
 	out->SurfaceWidthC[location] = in->timing.h_addressable;
 	out->SurfaceHeightC[location] = in->timing.v_addressable;
 	out->PitchY[location] = ((out->SurfaceWidthY[location] + 127) / 128) * 128;
-	out->PitchC[location] = 0;
+	out->PitchC[location] = 1;
 	out->DCCEnable[location] = false;
 	out->DCCMetaPitchY[location] = 0;
 	out->DCCMetaPitchC[location] = 0;
@@ -973,7 +973,9 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p
 	}
 }
 
-static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc_state *context, struct scaler_data *out)
+static struct scaler_data *get_scaler_data_for_plane(
+		const struct dc_plane_state *in,
+		struct dc_state *context)
 {
 	int i;
 	struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe;
@@ -994,7 +996,7 @@ static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc
 	}
 
 	ASSERT(i < MAX_PIPES);
-	memcpy(out, &temp_pipe->plane_res.scl_data, sizeof(*out));
+	return &temp_pipe->plane_res.scl_data;
 }
 
 static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location,
@@ -1057,11 +1059,7 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out
 						    const struct dc_plane_state *in, struct dc_state *context,
 						    const struct soc_bounding_box_st *soc)
 {
-	struct scaler_data *scaler_data = kzalloc(sizeof(*scaler_data), GFP_KERNEL);
-	if (!scaler_data)
-		return;
-
-	get_scaler_data_for_plane(in, context, scaler_data);
+	struct scaler_data *scaler_data = get_scaler_data_for_plane(in, context);
 
 	out->CursorBPP[location] = dml_cur_32bit;
 	out->CursorWidth[location] = 256;
@@ -1126,8 +1124,6 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out
 	out->DynamicMetadataTransmittedBytes[location] = 0;
 
 	out->NumberOfCursors[location] = 1;
-
-	kfree(scaler_data);
 }
 
 static unsigned int map_stream_to_dml_display_cfg(const struct dml2_context *dml2,
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
index e89571874185..525b7d04bf84 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
@@ -663,7 +663,10 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s
 		dml2_copy_clocks_to_dc_state(&out_clks, context);
 		dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.a, &dml2->v20.dml_core_ctx);
 		dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.b, &dml2->v20.dml_core_ctx);
-		memcpy(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.g6_temp_read_watermark_set, sizeof(context->bw_ctx.bw.dcn.watermarks.c));
+		if (context->streams[0]->sink->link->dc->caps.is_apu)
+			dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.dml_core_ctx);
+		else
+			memcpy(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.g6_temp_read_watermark_set, sizeof(context->bw_ctx.bw.dcn.watermarks.c));
 		dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.d, &dml2->v20.dml_core_ctx);
 		dml2_extract_writeback_wm(context, &dml2->v20.dml_core_ctx);
 		//copy for deciding zstate use
diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c
index abf439e743f2..2d70586cef40 100644
--- a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c
@@ -790,8 +790,7 @@ static bool dpp3_program_blnd_lut(struct dpp *dpp_base,
 
 	if (params == NULL) {
 		REG_SET(CM_BLNDGAM_CONTROL, 0, CM_BLNDGAM_MODE, 0);
-		if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.cm)
-			dpp3_power_on_blnd_lut(dpp_base, false);
+		dpp3_power_on_blnd_lut(dpp_base, false);
 		return false;
 	}
 
@@ -1204,8 +1203,7 @@ static bool dpp3_program_shaper(struct dpp *dpp_base,
 
 	if (params == NULL) {
 		REG_SET(CM_SHAPER_CONTROL, 0, CM_SHAPER_LUT_MODE, 0);
-		if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.cm)
-			dpp3_power_on_shaper(dpp_base, false);
+		dpp3_power_on_shaper(dpp_base, false);
 		return false;
 	}
 
@@ -1399,8 +1397,7 @@ static bool dpp3_program_3dlut(struct dpp *dpp_base,
 
 	if (params == NULL) {
 		dpp3_set_3dlut_mode(dpp_base, LUT_BYPASS, false, false);
-		if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.cm)
-			dpp3_power_on_hdr3dlut(dpp_base, false);
+		dpp3_power_on_hdr3dlut(dpp_base, false);
 		return false;
 	}
 
diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c
index 62b7012cda43..f7a373a3d70a 100644
--- a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c
@@ -138,7 +138,7 @@ bool dpp35_construct(
 	dpp->base.funcs = &dcn35_dpp_funcs;
 
 	// w/a for cursor memory stuck in LS by programming DISPCLK_R_GATE_DISABLE, limit w/a to some ASIC revs
-	if (dpp->base.ctx->asic_id.hw_internal_rev <= 0x10)
+	if (dpp->base.ctx->asic_id.hw_internal_rev < 0x40)
 		dpp->dispclk_r_gate_disable = true;
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c
index 75128fd34306..bd1b9aef6d5c 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c
@@ -57,13 +57,6 @@ static const struct dsc_funcs dcn20_dsc_funcs = {
 #define DC_LOGGER \
 	dsc->ctx->logger
 
-enum dsc_bits_per_comp {
-	DSC_BPC_8 = 8,
-	DSC_BPC_10 = 10,
-	DSC_BPC_12 = 12,
-	DSC_BPC_UNKNOWN
-};
-
 /* API functions (external or via structure->function_pointer) */
 
 void dsc2_construct(struct dcn20_dsc *dsc,
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h
index 1fb90b52b814..a9c04fc95bd1 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h
@@ -457,6 +457,12 @@
 	type DSCRM_DSC_DOUBLE_BUFFER_REG_UPDATE_PENDING; \
 	type DSCRM_DSC_FORWARD_EN_STATUS
 
+enum dsc_bits_per_comp {
+	DSC_BPC_8 = 8,
+	DSC_BPC_10 = 10,
+	DSC_BPC_12 = 12,
+	DSC_BPC_UNKNOWN
+};
 
 struct dcn20_dsc_registers {
 	uint32_t DSC_TOP_CONTROL;
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c
index 4893b793fec0..4222679fd4c9 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c
@@ -45,12 +45,6 @@ static const struct dsc_funcs dcn401_dsc_funcs = {
 #define DC_LOGGER \
 	dsc->ctx->logger
 
-enum dsc_bits_per_comp {
-	DSC_BPC_8 = 8,
-	DSC_BPC_10 = 10,
-	DSC_BPC_12 = 12,
-	DSC_BPC_UNKNOWN
-};
 
 /* API functions (external or via structure->function_pointer) */
 
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
index b099989d9364..942d9f0b6df2 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
@@ -411,6 +411,20 @@ enum dc_irq_source dal_irq_get_rx_source(
 	}
 }
 
+enum dc_irq_source dal_irq_get_read_request(
+	const struct gpio *irq)
+{
+	enum gpio_id id = dal_gpio_get_id(irq);
+
+	switch (id) {
+	case GPIO_ID_HPD:
+		return (enum dc_irq_source)(DC_IRQ_SOURCE_DCI2C_RR_DDC1 +
+			dal_gpio_get_enum(irq));
+	default:
+		return DC_IRQ_SOURCE_INVALID;
+	}
+}
+
 enum gpio_result dal_irq_setup_hpd_filter(
 	struct gpio *irq,
 	struct gpio_hpd_config *config)
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.c
index 2546224b326a..e4496ad203b2 100644
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.c
@@ -132,9 +132,9 @@ int hubbub21_init_dchub(struct hubbub *hubbub,
 		// Init VMID 0 based on PA config
 		dcn20_vmid_setup(&hubbub1->vmid[0], &phys_config);
 	}
-
-	dcn21_dchvm_init(hubbub);
-
+	if (!hubbub1->base.ctx->dc->config.skip_riommu_prefetch_wa) {
+		dcn21_dchvm_init(hubbub);
+	}
 	return hubbub1->num_vmid;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/Makefile b/drivers/gpu/drm/amd/display/dc/hwss/Makefile
index 40ecebea1ba0..bee617ca0838 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/hwss/Makefile
@@ -27,6 +27,24 @@
 #  DCE
 ###############################################################################
 
+ifdef CONFIG_DRM_AMD_DC_SI
+HWSS_DCE60 = dce60_hwseq.o
+
+AMD_DAL_HWSS_DCE60 = $(addprefix $(AMDDALPATH)/dc/hwss/dce60/,$(HWSS_DCE60))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE60)
+endif
+
+###############################################################################
+
+HWSS_DCE80 = dce80_hwseq.o
+
+AMD_DAL_HWSS_DCE80 = $(addprefix $(AMDDALPATH)/dc/hwss/dce80/,$(HWSS_DCE80))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE80)
+
+###############################################################################
+
 HWSS_DCE = dce_hwseq.o
 
 AMD_DAL_HWSS_DCE = $(addprefix $(AMDDALPATH)/dc/hwss/dce/,$(HWSS_DCE))
@@ -65,14 +83,6 @@ AMD_DAL_HWSS_DCE120 = $(addprefix $(AMDDALPATH)/dc/hwss/dce120/,$(HWSS_DCE120))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE120)
 
-###############################################################################
-
-HWSS_DCE80 = dce80_hwseq.o
-
-AMD_DAL_HWSS_DCE80 = $(addprefix $(AMDDALPATH)/dc/hwss/dce80/,$(HWSS_DCE80))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE80)
-
 ifdef CONFIG_DRM_AMD_DC_FP
 ###############################################################################
 # DCN
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 5656d10368ad..23bec5d25ed6 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -2763,12 +2763,12 @@ static void dce110_enable_per_frame_crtc_position_reset(
 
 }
 
-static void init_pipes(struct dc *dc, struct dc_state *context)
+static void dce110_init_pipes(struct dc *dc, struct dc_state *context)
 {
 	// Do nothing
 }
 
-static void init_hw(struct dc *dc)
+static void dce110_init_hw(struct dc *dc)
 {
 	int i;
 	struct dc_bios *bp;
@@ -3327,7 +3327,7 @@ void dce110_disable_link_output(struct dc_link *link,
 static const struct hw_sequencer_funcs dce110_funcs = {
 	.program_gamut_remap = program_gamut_remap,
 	.program_output_csc = program_output_csc,
-	.init_hw = init_hw,
+	.init_hw = dce110_init_hw,
 	.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
 	.apply_ctx_for_surface = dce110_apply_ctx_for_surface,
 	.post_unlock_program_front_end = dce110_post_unlock_program_front_end,
@@ -3371,7 +3371,7 @@ static const struct hw_sequencer_funcs dce110_funcs = {
 };
 
 static const struct hwseq_private_funcs dce110_private_funcs = {
-	.init_pipes = init_pipes,
+	.init_pipes = dce110_init_pipes,
 	.set_input_transfer_func = dce110_set_input_transfer_func,
 	.set_output_transfer_func = dce110_set_output_transfer_func,
 	.power_down = dce110_power_down,
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.c
index 44b56490e152..a08e9f9eec17 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.c
@@ -26,7 +26,7 @@
 #include "dm_services.h"
 #include "dc.h"
 #include "core_types.h"
-#include "dce60_hw_sequencer.h"
+#include "dce60_hwseq.h"
 
 #include "dce/dce_hwseq.h"
 #include "dce110/dce110_hwseq.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.h
index f3b2d8b60d5b..f3b2d8b60d5b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index 912f96323ed6..f9ee55998b6b 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -94,6 +94,128 @@ static void print_microsec(struct dc_context *dc_ctx,
 			us_x10 % frac);
 }
 
+/*
+ * Delay until we passed busy-until-point to which we can
+ * do necessary locking/programming on consecutive full updates
+ */
+void dcn10_wait_for_pipe_update_if_needed(struct dc *dc, struct pipe_ctx *pipe_ctx, bool is_surface_update_only)
+{
+	struct crtc_position position;
+	struct dc_stream_state *stream = pipe_ctx->stream;
+	unsigned int vpos, frame_count;
+	uint32_t vupdate_start, vupdate_end, vblank_start;
+	unsigned int lines_to_vupdate, us_to_vupdate;
+	unsigned int us_per_line, us_vupdate;
+
+	if (!pipe_ctx->stream ||
+		!pipe_ctx->stream_res.tg ||
+		!pipe_ctx->stream_res.stream_enc)
+		return;
+
+	if (pipe_ctx->prev_odm_pipe &&
+				pipe_ctx->stream)
+		return;
+
+	if (!pipe_ctx->wait_is_required)
+		return;
+
+	struct timing_generator *tg = pipe_ctx->stream_res.tg;
+
+	if (tg->funcs->is_tg_enabled && !tg->funcs->is_tg_enabled(tg))
+		return;
+
+	dc->hwss.calc_vupdate_position(dc, pipe_ctx, &vupdate_start,
+						&vupdate_end);
+
+	dc->hwss.get_position(&pipe_ctx, 1, &position);
+	vpos = position.vertical_count;
+
+	frame_count = tg->funcs->get_frame_count(tg);
+
+	if (frame_count - pipe_ctx->wait_frame_count > 2)
+		return;
+
+	vblank_start = pipe_ctx->pipe_dlg_param.vblank_start;
+
+	if (vpos >= vupdate_start && vupdate_start >= vblank_start)
+		lines_to_vupdate = stream->timing.v_total - vpos + vupdate_start;
+	else
+		lines_to_vupdate = vupdate_start - vpos;
+
+	us_per_line =
+		stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz;
+	us_to_vupdate = lines_to_vupdate * us_per_line;
+
+	if (vupdate_end < vupdate_start)
+		vupdate_end += stream->timing.v_total;
+
+	if (lines_to_vupdate > stream->timing.v_total - vupdate_end + vupdate_start)
+		us_to_vupdate = 0;
+
+	us_vupdate = (vupdate_end - vupdate_start + 1) * us_per_line;
+
+	if (is_surface_update_only && us_to_vupdate + us_vupdate > 200) {
+		//surface updates come in at high irql
+		pipe_ctx->wait_is_required = true;
+		return;
+	}
+
+	fsleep(us_to_vupdate + us_vupdate);
+
+	//clear
+	pipe_ctx->next_vupdate = 0;
+	pipe_ctx->wait_frame_count = 0;
+	pipe_ctx->wait_is_required = false;
+}
+
+/*
+ * On pipe unlock and programming, indicate pipe will be busy
+ * until some frame and line (vupdate), this is required for consecutive
+ * full updates, need to wait for updates
+ * to latch to try and program the next update
+ */
+void dcn10_set_wait_for_update_needed_for_pipe(struct dc *dc, struct pipe_ctx *pipe_ctx)
+{
+	uint32_t vupdate_start, vupdate_end;
+	struct crtc_position position;
+	unsigned int vpos, cur_frame;
+
+	if (!pipe_ctx->stream ||
+		!pipe_ctx->stream_res.tg ||
+		!pipe_ctx->stream_res.stream_enc)
+		return;
+
+	dc->hwss.get_position(&pipe_ctx, 1, &position);
+	vpos = position.vertical_count;
+
+	dc->hwss.calc_vupdate_position(dc, pipe_ctx, &vupdate_start,
+						&vupdate_end);
+
+	struct timing_generator *tg = pipe_ctx->stream_res.tg;
+
+	struct optc *optc1 = DCN10TG_FROM_TG(tg);
+
+	ASSERT(optc1->max_frame_count != 0);
+
+	if (tg->funcs->is_tg_enabled && !tg->funcs->is_tg_enabled(tg))
+		return;
+
+	pipe_ctx->next_vupdate = vupdate_start;
+
+	cur_frame = tg->funcs->get_frame_count(tg);
+
+	if (vpos < vupdate_start) {
+		pipe_ctx->wait_frame_count = cur_frame;
+	} else {
+		if (cur_frame + 1 > optc1->max_frame_count)
+			pipe_ctx->wait_frame_count = cur_frame + 1 - optc1->max_frame_count;
+		else
+			pipe_ctx->wait_frame_count = cur_frame + 1;
+	}
+
+	pipe_ctx->wait_is_required = true;
+}
+
 void dcn10_lock_all_pipes(struct dc *dc,
 	struct dc_state *context,
 	bool lock)
@@ -2664,7 +2786,6 @@ void dcn10_update_visual_confirm_color(struct dc *dc,
 	struct mpc *mpc = dc->res_pool->mpc;
 
 	if (mpc->funcs->set_bg_color) {
-		memcpy(&pipe_ctx->plane_state->visual_confirm_color, &(pipe_ctx->visual_confirm_color), sizeof(struct tg_color));
 		mpc->funcs->set_bg_color(mpc, &(pipe_ctx->visual_confirm_color), mpcc_id);
 	}
 }
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h
index 42ffd1e1299c..57d30ea225f2 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h
@@ -50,6 +50,13 @@ void dcn10_optimize_bandwidth(
 void dcn10_prepare_bandwidth(
 		struct dc *dc,
 		struct dc_state *context);
+void dcn10_wait_for_pipe_update_if_needed(
+		struct dc *dc,
+		struct pipe_ctx *pipe_ctx,
+		bool is_surface_update_only);
+void dcn10_set_wait_for_update_needed_for_pipe(
+	struct dc *dc,
+	struct pipe_ctx *pipe_ctx);
 void dcn10_pipe_control_lock(
 	struct dc *dc,
 	struct pipe_ctx *pipe,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index 846c9c51f2d9..5e78b553adbd 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -2482,7 +2482,7 @@ bool dcn20_update_bandwidth(
 	struct dce_hwseq *hws = dc->hwseq;
 
 	/* recalculate DML parameters */
-	if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false))
+	if (dc->res_pool->funcs->validate_bandwidth(dc, context, false) != DC_OK)
 		return false;
 
 	/* apply updated bandwidth parameters */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
index be26c925fdfa..e68f21fd5f0f 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
@@ -84,6 +84,20 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
 		struct dsc_config dsc_cfg;
 		struct dsc_optc_config dsc_optc_cfg = {0};
 		enum optc_dsc_mode optc_dsc_mode;
+		struct dcn_dsc_state dsc_state = {0};
+
+		if (!dsc) {
+			DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+			return;
+		}
+
+		if (dsc->funcs->dsc_read_state) {
+			dsc->funcs->dsc_read_state(dsc, &dsc_state);
+			if (!dsc_state.dsc_fw_en) {
+				DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+				return;
+			}
+		}
 
 		/* Enable DSC hw block */
 		dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index cd0adf72b223..a0b05b9ef660 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -1181,6 +1181,7 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	unsigned int odm_combine_factor = 0;
 	bool two_pix_per_container = false;
+	struct dce_hwseq *hws = stream->ctx->dc->hwseq;
 
 	two_pix_per_container = pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&stream->timing);
 	odm_combine_factor = get_odm_config(pipe_ctx, NULL);
@@ -1201,7 +1202,8 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign
 		} else {
 			*k1_div = PIXEL_RATE_DIV_BY_1;
 			*k2_div = PIXEL_RATE_DIV_BY_4;
-			if ((odm_combine_factor == 2) || dcn32_is_dp_dig_pixel_rate_div_policy(pipe_ctx))
+			if ((odm_combine_factor == 2) || (hws->funcs.is_dp_dig_pixel_rate_div_policy &&
+				hws->funcs.is_dp_dig_pixel_rate_div_policy(pipe_ctx)))
 				*k2_div = PIXEL_RATE_DIV_BY_2;
 		}
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 922b8d71cf1a..63077c1fad85 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -241,11 +241,6 @@ void dcn35_init_hw(struct dc *dc)
 			dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub,
 					!dc->res_pool->hubbub->ctx->dc->debug.disable_stutter);
 	}
-	if (res_pool->dccg->funcs->dccg_root_gate_disable_control) {
-		for (i = 0; i < res_pool->pipe_count; i++)
-			res_pool->dccg->funcs->dccg_root_gate_disable_control(res_pool->dccg, i, 0);
-	}
-
 	for (i = 0; i < res_pool->audio_count; i++) {
 		struct audio *audio = res_pool->audios[i];
 
@@ -901,12 +896,18 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context)
 void dcn35_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx,
 			       struct dc_state *context)
 {
+	struct dpp *dpp = pipe_ctx->plane_res.dpp;
+	struct dccg *dccg = dc->res_pool->dccg;
+
+
 	/* enable DCFCLK current DCHUB */
 	pipe_ctx->plane_res.hubp->funcs->hubp_clk_cntl(pipe_ctx->plane_res.hubp, true);
 
 	/* initialize HUBP on power up */
 	pipe_ctx->plane_res.hubp->funcs->hubp_init(pipe_ctx->plane_res.hubp);
-
+	/*make sure DPPCLK is on*/
+	dccg->funcs->dccg_root_gate_disable_control(dccg, dpp->inst, true);
+	dpp->funcs->dpp_dppclk_control(dpp, false, true);
 	/* make sure OPP_PIPE_CLOCK_EN = 1 */
 	pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control(
 			pipe_ctx->stream_res.opp,
@@ -923,6 +924,7 @@ void dcn35_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx,
 		// Program system aperture settings
 		pipe_ctx->plane_res.hubp->funcs->hubp_set_vm_system_aperture_settings(pipe_ctx->plane_res.hubp, &apt);
 	}
+	//DC_LOG_DEBUG("%s: dpp_inst(%d) =\n", __func__, dpp->inst);
 
 	if (!pipe_ctx->top_pipe
 		&& pipe_ctx->plane_state
@@ -938,6 +940,8 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
 	struct dpp *dpp = pipe_ctx->plane_res.dpp;
+	struct dccg *dccg = dc->res_pool->dccg;
+
 
 	dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe_ctx);
 
@@ -955,7 +959,8 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	hubp->funcs->hubp_clk_cntl(hubp, false);
 
 	dpp->funcs->dpp_dppclk_control(dpp, false, false);
-/*to do, need to support both case*/
+	dccg->funcs->dccg_root_gate_disable_control(dccg, dpp->inst, false);
+
 	hubp->power_gated = true;
 
 	hubp->funcs->hubp_reset(hubp);
@@ -967,6 +972,8 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
 	pipe_ctx->top_pipe = NULL;
 	pipe_ctx->bottom_pipe = NULL;
 	pipe_ctx->plane_state = NULL;
+	//DC_LOG_DEBUG("%s: dpp_inst(%d)=\n", __func__, dpp->inst);
+
 }
 
 void dcn35_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
index 6a82a865209c..a3ccf805bd16 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
@@ -168,6 +168,8 @@ static const struct hwseq_private_funcs dcn35_private_funcs = {
 	.dsc_pg_control = dcn35_dsc_pg_control,
 	.dsc_pg_status = dcn32_dsc_pg_status,
 	.enable_plane = dcn35_enable_plane,
+	.wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed,
+	.set_wait_for_update_needed_for_pipe = dcn10_set_wait_for_update_needed_for_pipe,
 };
 
 void dcn35_hw_sequencer_construct(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
index 902a96940a01..58f2be2a326b 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
@@ -158,10 +158,12 @@ static const struct hwseq_private_funcs dcn351_private_funcs = {
 	.set_mcm_luts = dcn32_set_mcm_luts,
 	.setup_hpo_hw_control = dcn35_setup_hpo_hw_control,
 	.calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values,
-	.is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy,
+	.is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy,
 	.dsc_pg_control = dcn35_dsc_pg_control,
 	.dsc_pg_status = dcn32_dsc_pg_status,
 	.enable_plane = dcn35_enable_plane,
+	.wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed,
+	.set_wait_for_update_needed_for_pipe = dcn10_set_wait_for_update_needed_for_pipe,
 };
 
 void dcn351_hw_sequencer_construct(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
index 5489f3d431f6..ae7194da5987 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
@@ -396,6 +396,249 @@ static void dcn401_get_mcm_lut_xable_from_pipe_ctx(struct dc *dc, struct pipe_ct
 	}
 }
 
+static void dcn401_set_mcm_location_post_blend(struct dc *dc, struct pipe_ctx *pipe_ctx, bool bPostBlend)
+{
+	struct mpc *mpc = dc->res_pool->mpc;
+	int mpcc_id = pipe_ctx->plane_res.hubp->inst;
+
+	if (!pipe_ctx->plane_state)
+		return;
+
+	mpc->funcs->set_movable_cm_location(mpc, MPCC_MOVABLE_CM_LOCATION_BEFORE, mpcc_id);
+	pipe_ctx->plane_state->mcm_location = (bPostBlend) ?
+											MPCC_MOVABLE_CM_LOCATION_AFTER :
+											MPCC_MOVABLE_CM_LOCATION_BEFORE;
+}
+
+static void dc_get_lut_mode(
+	enum dc_cm2_gpu_mem_layout layout,
+	enum hubp_3dlut_fl_mode *mode,
+	enum hubp_3dlut_fl_addressing_mode *addr_mode)
+{
+	switch (layout) {
+	case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB:
+		*mode = hubp_3dlut_fl_mode_native_1;
+		*addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear;
+		break;
+	case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR:
+		*mode = hubp_3dlut_fl_mode_native_2;
+		*addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear;
+		break;
+	case DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR:
+		*mode = hubp_3dlut_fl_mode_transform;
+		*addr_mode = hubp_3dlut_fl_addressing_mode_simple_linear;
+		break;
+	default:
+		*mode = hubp_3dlut_fl_mode_disable;
+		*addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear;
+		break;
+	}
+}
+
+static void dc_get_lut_format(
+	enum dc_cm2_gpu_mem_format dc_format,
+	enum hubp_3dlut_fl_format *format)
+{
+	switch (dc_format) {
+	case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12MSB:
+		*format = hubp_3dlut_fl_format_unorm_12msb_bitslice;
+		break;
+	case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12LSB:
+		*format = hubp_3dlut_fl_format_unorm_12lsb_bitslice;
+		break;
+	case DC_CM2_GPU_MEM_FORMAT_16161616_FLOAT_FP1_5_10:
+		*format = hubp_3dlut_fl_format_float_fp1_5_10;
+		break;
+	}
+}
+
+static void dc_get_lut_xbar(
+	enum dc_cm2_gpu_mem_pixel_component_order order,
+	enum hubp_3dlut_fl_crossbar_bit_slice *cr_r,
+	enum hubp_3dlut_fl_crossbar_bit_slice *y_g,
+	enum hubp_3dlut_fl_crossbar_bit_slice *cb_b)
+{
+	switch (order) {
+	case DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA:
+		*cr_r = hubp_3dlut_fl_crossbar_bit_slice_32_47;
+		*y_g = hubp_3dlut_fl_crossbar_bit_slice_16_31;
+		*cb_b =  hubp_3dlut_fl_crossbar_bit_slice_0_15;
+		break;
+	case DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_BGRA:
+		*cr_r = hubp_3dlut_fl_crossbar_bit_slice_0_15;
+		*y_g = hubp_3dlut_fl_crossbar_bit_slice_16_31;
+		*cb_b = hubp_3dlut_fl_crossbar_bit_slice_32_47;
+		break;
+	}
+}
+
+static void dc_get_lut_width(
+	enum dc_cm2_gpu_mem_size  size,
+	enum hubp_3dlut_fl_width *width)
+{
+	switch (size) {
+	case DC_CM2_GPU_MEM_SIZE_333333:
+		*width = hubp_3dlut_fl_width_33;
+		break;
+	case DC_CM2_GPU_MEM_SIZE_171717:
+		*width = hubp_3dlut_fl_width_17;
+		break;
+	case DC_CM2_GPU_MEM_SIZE_TRANSFORMED:
+		*width = hubp_3dlut_fl_width_transformed;
+		break;
+	}
+}
+static bool dc_is_rmcm_3dlut_supported(struct hubp *hubp, struct mpc *mpc)
+{
+	if (mpc->funcs->rmcm.update_3dlut_fast_load_select &&
+		mpc->funcs->rmcm.program_lut_read_write_control &&
+		hubp->funcs->hubp_program_3dlut_fl_addr &&
+		mpc->funcs->rmcm.program_bit_depth &&
+		hubp->funcs->hubp_program_3dlut_fl_mode &&
+		hubp->funcs->hubp_program_3dlut_fl_addressing_mode &&
+		hubp->funcs->hubp_program_3dlut_fl_format &&
+		hubp->funcs->hubp_update_3dlut_fl_bias_scale &&
+		mpc->funcs->rmcm.program_bias_scale &&
+		hubp->funcs->hubp_program_3dlut_fl_crossbar &&
+		hubp->funcs->hubp_program_3dlut_fl_width &&
+		mpc->funcs->rmcm.update_3dlut_fast_load_select &&
+		mpc->funcs->rmcm.populate_lut &&
+		mpc->funcs->rmcm.program_lut_mode &&
+		hubp->funcs->hubp_enable_3dlut_fl &&
+		mpc->funcs->rmcm.enable_3dlut_fl)
+		return true;
+
+	return false;
+}
+
+bool dcn401_program_rmcm_luts(
+	struct hubp *hubp,
+	struct pipe_ctx *pipe_ctx,
+	enum dc_cm2_transfer_func_source lut3d_src,
+	struct dc_cm2_func_luts *mcm_luts,
+	struct mpc *mpc,
+	bool lut_bank_a,
+	int mpcc_id)
+{
+	struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
+	union mcm_lut_params m_lut_params;
+	enum MCM_LUT_XABLE shaper_xable, lut3d_xable = MCM_LUT_DISABLE, lut1d_xable;
+	enum hubp_3dlut_fl_mode mode;
+	enum hubp_3dlut_fl_addressing_mode addr_mode;
+	enum hubp_3dlut_fl_format format;
+	enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_y_g;
+	enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cb_b;
+	enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cr_r;
+	enum hubp_3dlut_fl_width width;
+	struct dc *dc = hubp->ctx->dc;
+
+	bool bypass_rmcm_3dlut  = false;
+	bool bypass_rmcm_shaper = false;
+
+	dcn401_get_mcm_lut_xable_from_pipe_ctx(dc, pipe_ctx, &shaper_xable, &lut3d_xable, &lut1d_xable);
+
+	/* 3DLUT */
+	switch (lut3d_src) {
+	case DC_CM2_TRANSFER_FUNC_SOURCE_SYSMEM:
+		memset(&m_lut_params, 0, sizeof(m_lut_params));
+		// Don't know what to do in this case.
+		//case DC_CM2_TRANSFER_FUNC_SOURCE_SYSMEM:
+		break;
+	case DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM:
+		dc_get_lut_width(mcm_luts->lut3d_data.gpu_mem_params.size, &width);
+		if (!dc_is_rmcm_3dlut_supported(hubp, mpc) ||
+			!mpc->funcs->rmcm.is_config_supported(width))
+			return false;
+
+		//0. disable fl on mpc
+		mpc->funcs->update_3dlut_fast_load_select(mpc, mpcc_id, 0xF);
+
+		//1. power down the block
+		mpc->funcs->rmcm.power_on_shaper_3dlut(mpc, mpcc_id, false);
+
+		//2. program RMCM
+		//2a. 3dlut reg programming
+		mpc->funcs->rmcm.program_lut_read_write_control(mpc, MCM_LUT_3DLUT, lut_bank_a,
+				(!bypass_rmcm_3dlut) && lut3d_xable != MCM_LUT_DISABLE, mpcc_id);
+
+		hubp->funcs->hubp_program_3dlut_fl_addr(hubp,
+				mcm_luts->lut3d_data.gpu_mem_params.addr);
+
+		mpc->funcs->rmcm.program_bit_depth(mpc,
+				mcm_luts->lut3d_data.gpu_mem_params.bit_depth, mpcc_id);
+
+		// setting native or transformed mode,
+		dc_get_lut_mode(mcm_luts->lut3d_data.gpu_mem_params.layout, &mode, &addr_mode);
+
+		//these program the mcm 3dlut
+		hubp->funcs->hubp_program_3dlut_fl_mode(hubp, mode);
+
+		hubp->funcs->hubp_program_3dlut_fl_addressing_mode(hubp, addr_mode);
+
+		//seems to be only for the MCM
+		dc_get_lut_format(mcm_luts->lut3d_data.gpu_mem_params.format_params.format, &format);
+		hubp->funcs->hubp_program_3dlut_fl_format(hubp, format);
+
+		mpc->funcs->rmcm.program_bias_scale(mpc,
+			mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.bias,
+			mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.scale,
+			mpcc_id);
+		hubp->funcs->hubp_update_3dlut_fl_bias_scale(hubp,
+					mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.bias,
+					mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.scale);
+
+		dc_get_lut_xbar(
+			mcm_luts->lut3d_data.gpu_mem_params.component_order,
+			&crossbar_bit_slice_cr_r,
+			&crossbar_bit_slice_y_g,
+			&crossbar_bit_slice_cb_b);
+
+		hubp->funcs->hubp_program_3dlut_fl_crossbar(hubp,
+			crossbar_bit_slice_cr_r,
+			crossbar_bit_slice_y_g,
+			crossbar_bit_slice_cb_b);
+
+		mpc->funcs->rmcm.program_3dlut_size(mpc, width, mpcc_id);
+
+		mpc->funcs->update_3dlut_fast_load_select(mpc, mpcc_id, hubp->inst);
+
+		//2b. shaper reg programming
+		memset(&m_lut_params, 0, sizeof(m_lut_params));
+
+		if (mcm_luts->shaper->type == TF_TYPE_HWPWL) {
+			m_lut_params.pwl = &mcm_luts->shaper->pwl;
+		} else if (mcm_luts->shaper->type == TF_TYPE_DISTRIBUTED_POINTS) {
+			ASSERT(false);
+			cm_helper_translate_curve_to_hw_format(
+					dc->ctx,
+					mcm_luts->shaper,
+					&dpp_base->regamma_params, true);
+			m_lut_params.pwl = &dpp_base->regamma_params;
+		}
+		if (m_lut_params.pwl) {
+			mpc->funcs->rmcm.populate_lut(mpc, m_lut_params, lut_bank_a, mpcc_id);
+			mpc->funcs->rmcm.program_lut_mode(mpc, !bypass_rmcm_shaper, lut_bank_a, mpcc_id);
+		} else {
+			//RMCM 3dlut won't work without its shaper
+			return false;
+		}
+
+		//3. Select the hubp connected to this RMCM
+		hubp->funcs->hubp_enable_3dlut_fl(hubp, true);
+		mpc->funcs->rmcm.enable_3dlut_fl(mpc, true, mpcc_id);
+
+		//4. power on the block
+		if (m_lut_params.pwl)
+			mpc->funcs->rmcm.power_on_shaper_3dlut(mpc, mpcc_id, true);
+
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
 void dcn401_populate_mcm_luts(struct dc *dc,
 		struct pipe_ctx *pipe_ctx,
 		struct dc_cm2_func_luts mcm_luts,
@@ -407,9 +650,9 @@ void dcn401_populate_mcm_luts(struct dc *dc,
 	struct mpc *mpc = dc->res_pool->mpc;
 	union mcm_lut_params m_lut_params;
 	enum dc_cm2_transfer_func_source lut3d_src = mcm_luts.lut3d_data.lut3d_src;
-	enum hubp_3dlut_fl_format format;
+	enum hubp_3dlut_fl_format format = 0;
 	enum hubp_3dlut_fl_mode mode;
-	enum hubp_3dlut_fl_width width;
+	enum hubp_3dlut_fl_width width = 0;
 	enum hubp_3dlut_fl_addressing_mode addr_mode;
 	enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_y_g;
 	enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cb_b;
@@ -417,11 +660,29 @@ void dcn401_populate_mcm_luts(struct dc *dc,
 	enum MCM_LUT_XABLE shaper_xable = MCM_LUT_DISABLE;
 	enum MCM_LUT_XABLE lut3d_xable = MCM_LUT_DISABLE;
 	enum MCM_LUT_XABLE lut1d_xable = MCM_LUT_DISABLE;
-	bool is_17x17x17 = true;
 	bool rval;
 
 	dcn401_get_mcm_lut_xable_from_pipe_ctx(dc, pipe_ctx, &shaper_xable, &lut3d_xable, &lut1d_xable);
 
+	//MCM - setting its location (Before/After) blender
+	//set to post blend (true)
+	dcn401_set_mcm_location_post_blend(
+		dc,
+		pipe_ctx,
+		mcm_luts.lut3d_data.mpc_mcm_post_blend);
+
+	//RMCM - 3dLUT+Shaper
+	if (mcm_luts.lut3d_data.rmcm_3dlut_enable) {
+		dcn401_program_rmcm_luts(
+			hubp,
+			pipe_ctx,
+			lut3d_src,
+			&mcm_luts,
+			mpc,
+			lut_bank_a,
+			mpcc_id);
+	}
+
 	/* 1D LUT */
 	if (mcm_luts.lut1d_func) {
 		memset(&m_lut_params, 0, sizeof(m_lut_params));
@@ -442,7 +703,7 @@ void dcn401_populate_mcm_luts(struct dc *dc,
 	}
 
 	/* Shaper */
-	if (mcm_luts.shaper) {
+	if (mcm_luts.shaper && mcm_luts.lut3d_data.mpc_3dlut_enable) {
 		memset(&m_lut_params, 0, sizeof(m_lut_params));
 		if (mcm_luts.shaper->type == TF_TYPE_HWPWL)
 			m_lut_params.pwl = &mcm_luts.shaper->pwl;
@@ -454,11 +715,11 @@ void dcn401_populate_mcm_luts(struct dc *dc,
 			m_lut_params.pwl = rval ? &dpp_base->regamma_params : NULL;
 		}
 		if (m_lut_params.pwl) {
-			if (mpc->funcs->populate_lut)
-				mpc->funcs->populate_lut(mpc, MCM_LUT_SHAPER, m_lut_params, lut_bank_a, mpcc_id);
+			if (mpc->funcs->mcm.populate_lut)
+				mpc->funcs->mcm.populate_lut(mpc, m_lut_params, lut_bank_a, mpcc_id);
+			if (mpc->funcs->program_lut_mode)
+				mpc->funcs->program_lut_mode(mpc, MCM_LUT_SHAPER, MCM_LUT_ENABLE, lut_bank_a, mpcc_id);
 		}
-		if (mpc->funcs->program_lut_mode)
-			mpc->funcs->program_lut_mode(mpc, MCM_LUT_SHAPER, shaper_xable, lut_bank_a, mpcc_id);
 	}
 
 	/* 3DLUT */
@@ -467,6 +728,7 @@ void dcn401_populate_mcm_luts(struct dc *dc,
 		memset(&m_lut_params, 0, sizeof(m_lut_params));
 		if (hubp->funcs->hubp_enable_3dlut_fl)
 			hubp->funcs->hubp_enable_3dlut_fl(hubp, false);
+
 		if (mcm_luts.lut3d_data.lut3d_func && mcm_luts.lut3d_data.lut3d_func->state.bits.initialized) {
 			m_lut_params.lut3d = &mcm_luts.lut3d_data.lut3d_func->lut_3d;
 			if (mpc->funcs->populate_lut)
@@ -476,16 +738,35 @@ void dcn401_populate_mcm_luts(struct dc *dc,
 						mpcc_id);
 		}
 		break;
-	case DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM:
+		case DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM:
+		switch (mcm_luts.lut3d_data.gpu_mem_params.size) {
+		case DC_CM2_GPU_MEM_SIZE_333333:
+			width = hubp_3dlut_fl_width_33;
+			break;
+		case DC_CM2_GPU_MEM_SIZE_171717:
+			width = hubp_3dlut_fl_width_17;
+			break;
+		case DC_CM2_GPU_MEM_SIZE_TRANSFORMED:
+			width = hubp_3dlut_fl_width_transformed;
+			break;
+		}
+
+		//check for support
+		if (mpc->funcs->mcm.is_config_supported &&
+			!mpc->funcs->mcm.is_config_supported(width))
+			break;
 
 		if (mpc->funcs->program_lut_read_write_control)
 			mpc->funcs->program_lut_read_write_control(mpc, MCM_LUT_3DLUT, lut_bank_a, mpcc_id);
 		if (mpc->funcs->program_lut_mode)
 			mpc->funcs->program_lut_mode(mpc, MCM_LUT_3DLUT, lut3d_xable, lut_bank_a, mpcc_id);
-		if (mpc->funcs->program_3dlut_size)
-			mpc->funcs->program_3dlut_size(mpc, is_17x17x17, mpcc_id);
+
 		if (hubp->funcs->hubp_program_3dlut_fl_addr)
 			hubp->funcs->hubp_program_3dlut_fl_addr(hubp, mcm_luts.lut3d_data.gpu_mem_params.addr);
+
+		if (mpc->funcs->mcm.program_bit_depth)
+			mpc->funcs->mcm.program_bit_depth(mpc, mcm_luts.lut3d_data.gpu_mem_params.bit_depth, mpcc_id);
+
 		switch (mcm_luts.lut3d_data.gpu_mem_params.layout) {
 		case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB:
 			mode = hubp_3dlut_fl_mode_native_1;
@@ -512,7 +793,6 @@ void dcn401_populate_mcm_luts(struct dc *dc,
 
 		switch (mcm_luts.lut3d_data.gpu_mem_params.format_params.format) {
 		case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12MSB:
-		default:
 			format = hubp_3dlut_fl_format_unorm_12msb_bitslice;
 			break;
 		case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12LSB:
@@ -524,37 +804,37 @@ void dcn401_populate_mcm_luts(struct dc *dc,
 		}
 		if (hubp->funcs->hubp_program_3dlut_fl_format)
 			hubp->funcs->hubp_program_3dlut_fl_format(hubp, format);
-		if (hubp->funcs->hubp_update_3dlut_fl_bias_scale)
+		if (hubp->funcs->hubp_update_3dlut_fl_bias_scale &&
+				mpc->funcs->mcm.program_bias_scale) {
+			mpc->funcs->mcm.program_bias_scale(mpc,
+				mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.bias,
+				mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.scale,
+				mpcc_id);
 			hubp->funcs->hubp_update_3dlut_fl_bias_scale(hubp,
-					mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.bias,
-					mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.scale);
-
-		switch (mcm_luts.lut3d_data.gpu_mem_params.component_order) {
-		case DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA:
-		default:
-			crossbar_bit_slice_cr_r = hubp_3dlut_fl_crossbar_bit_slice_0_15;
-			crossbar_bit_slice_y_g = hubp_3dlut_fl_crossbar_bit_slice_16_31;
-			crossbar_bit_slice_cb_b = hubp_3dlut_fl_crossbar_bit_slice_32_47;
-			break;
+						mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.bias,
+						mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.scale);
 		}
 
+		//navi 4x has a bug and r and blue are swapped and need to be worked around here in
+		//TODO: need to make a method for get_xbar per asic OR do the workaround in program_crossbar for 4x
+		dc_get_lut_xbar(
+			mcm_luts.lut3d_data.gpu_mem_params.component_order,
+			&crossbar_bit_slice_cr_r,
+			&crossbar_bit_slice_y_g,
+			&crossbar_bit_slice_cb_b);
+
 		if (hubp->funcs->hubp_program_3dlut_fl_crossbar)
 			hubp->funcs->hubp_program_3dlut_fl_crossbar(hubp,
+					crossbar_bit_slice_cr_r,
 					crossbar_bit_slice_y_g,
-					crossbar_bit_slice_cb_b,
-					crossbar_bit_slice_cr_r);
+					crossbar_bit_slice_cb_b);
+
+		if (mpc->funcs->mcm.program_lut_read_write_control)
+			mpc->funcs->mcm.program_lut_read_write_control(mpc, MCM_LUT_3DLUT, lut_bank_a, true, mpcc_id);
+
+		if (mpc->funcs->mcm.program_3dlut_size)
+			mpc->funcs->mcm.program_3dlut_size(mpc, width, mpcc_id);
 
-		switch (mcm_luts.lut3d_data.gpu_mem_params.size) {
-		case DC_CM2_GPU_MEM_SIZE_171717:
-		default:
-			width = hubp_3dlut_fl_width_17;
-			break;
-		case DC_CM2_GPU_MEM_SIZE_TRANSFORMED:
-			width = hubp_3dlut_fl_width_transformed;
-			break;
-		}
-		if (hubp->funcs->hubp_program_3dlut_fl_width)
-			hubp->funcs->hubp_program_3dlut_fl_width(hubp, width);
 		if (mpc->funcs->update_3dlut_fast_load_select)
 			mpc->funcs->update_3dlut_fast_load_select(mpc, mpcc_id, hubp->inst);
 
@@ -2371,7 +2651,7 @@ bool dcn401_update_bandwidth(
 	struct dce_hwseq *hws = dc->hwseq;
 
 	/* recalculate DML parameters */
-	if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false))
+	if (dc->res_pool->funcs->validate_bandwidth(dc, context, false) != DC_OK)
 		return false;
 
 	/* apply updated bandwidth parameters */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h
index 781cf0efccc6..ce65b4f6c672 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h
@@ -109,4 +109,12 @@ void dcn401_detect_pipe_changes(
 void dcn401_plane_atomic_power_down(struct dc *dc,
 		struct dpp *dpp,
 		struct hubp *hubp);
+bool dcn401_program_rmcm_luts(
+	struct hubp *hubp,
+	struct pipe_ctx *pipe_ctx,
+	enum dc_cm2_transfer_func_source lut3d_src,
+	struct dc_cm2_func_luts *mcm_luts,
+	struct mpc *mpc,
+	bool lut_bank_a,
+	int mpcc_id);
 #endif /* __DC_HWSS_DCN401_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
index c8b5ed834579..3a0795045bc6 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
@@ -195,6 +195,8 @@ enum block_sequence_func {
 	DMUB_SUBVP_SAVE_SURF_ADDR,
 	HUBP_WAIT_FOR_DCC_META_PROP,
 	DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST,
+	/* This must be the last value in this enum, add new ones above */
+	HWSS_BLOCK_SEQUENCE_FUNC_COUNT
 };
 
 struct block_sequence {
@@ -202,6 +204,8 @@ struct block_sequence {
 	enum block_sequence_func func;
 };
 
+#define MAX_HWSS_BLOCK_SEQUENCE_SIZE (HWSS_BLOCK_SEQUENCE_FUNC_COUNT * MAX_PIPES)
+
 struct hw_sequencer_funcs {
 	void (*hardware_release)(struct dc *dc);
 	/* Embedded Display Related */
@@ -534,13 +538,13 @@ void set_drr_and_clear_adjust_pending(
 		struct drr_params *params);
 
 void hwss_execute_sequence(struct dc *dc,
-		struct block_sequence block_sequence[],
+		struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE],
 		int num_steps);
 
 void hwss_build_fast_sequence(struct dc *dc,
 		struct dc_dmub_cmd *dc_dmub_cmd,
 		unsigned int dmub_cmd_count,
-		struct block_sequence block_sequence[],
+		struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE],
 		unsigned int *num_steps,
 		struct pipe_ctx *pipe_ctx,
 		struct dc_stream_status *stream_status,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
index 22a5d4a03c98..09bc65c2fa23 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
@@ -183,6 +183,8 @@ struct hwseq_private_funcs {
 			struct dc_cm2_func_luts mcm_luts,
 			bool lut_bank_a);
 	void (*perform_3dlut_wa_unlock)(struct pipe_ctx *pipe_ctx);
+	void (*wait_for_pipe_update_if_needed)(struct dc *dc, struct pipe_ctx *pipe_ctx, bool is_surface_update_only);
+	void (*set_wait_for_update_needed_for_pipe)(struct dc *dc, struct pipe_ctx *pipe_ctx);
 };
 
 struct dce_hwseq {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_status.h b/drivers/gpu/drm/amd/display/dc/inc/core_status.h
index b5afd8c3103d..f3696143590c 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_status.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_status.h
@@ -26,6 +26,8 @@
 #ifndef _CORE_STATUS_H_
 #define _CORE_STATUS_H_
 
+#include "dc_hw_types.h"
+
 enum dc_status {
 	DC_OK = 1,
 
@@ -56,6 +58,7 @@ enum dc_status {
 	DC_NO_LINK_ENC_RESOURCE = 26,
 	DC_FAIL_DP_PAYLOAD_ALLOCATION = 27,
 	DC_FAIL_DP_LINK_BANDWIDTH = 28,
+	DC_FAIL_HW_CURSOR_SUPPORT = 29,
 	DC_ERROR_UNEXPECTED = -1
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index d0021f25f3d8..051bd83680d1 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -78,8 +78,7 @@ struct resource_funcs {
 	/* Create a minimal link encoder object with no dc_link object
 	 * associated with it. */
 	struct link_encoder *(*link_enc_create_minimal)(struct dc_context *ctx, enum engine_id eng_id);
-
-	bool (*validate_bandwidth)(
+	enum dc_status (*validate_bandwidth)(
 					struct dc *dc,
 					struct dc_state *context,
 					bool fast_validate);
@@ -218,6 +217,9 @@ struct resource_funcs {
 	int (*get_power_profile)(const struct dc_state *context);
 	unsigned int (*get_det_buffer_size)(const struct dc_state *context);
 	unsigned int (*get_vstartup_for_pipe)(struct pipe_ctx *pipe_ctx);
+	unsigned int (*get_max_hw_cursor_size)(const struct dc *dc,
+			struct dc_state *state,
+			const struct dc_stream_state *stream);
 };
 
 struct audio_support{
@@ -480,6 +482,10 @@ struct pipe_ctx {
 	struct pixel_rate_divider pixel_rate_divider;
 	/* pixels borrowed from hblank to hactive */
 	uint8_t hblank_borrow;
+	/* next vupdate */
+	uint32_t next_vupdate;
+	uint32_t wait_frame_count;
+	bool wait_is_required;
 };
 
 /* Data used for dynamic link encoder assignment.
@@ -507,7 +513,7 @@ struct resource_context {
 	unsigned int hpo_dp_link_enc_to_link_idx[MAX_HPO_DP2_LINK_ENCODERS];
 	int hpo_dp_link_enc_ref_cnts[MAX_HPO_DP2_LINK_ENCODERS];
 	bool is_mpc_3dlut_acquired[MAX_PIPES];
-	/* solely used for build scalar data in dml2 */
+	/* used to build scalar data in dml2 and for edp backlight programming */
 	struct pipe_ctx temp_pipe;
 };
 
@@ -630,7 +636,7 @@ struct dc_state {
 	 */
 	struct bw_context bw_ctx;
 
-	struct block_sequence block_sequence[100];
+	struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE];
 	unsigned int block_sequence_steps;
 	struct dc_dmub_cmd dc_dmub_cmd[10];
 	unsigned int dmub_cmd_count;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
index 221645c023b5..bac8febad69a 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
@@ -199,6 +199,7 @@ enum dentist_divider_range {
 	CLK_SR_DCN35(CLK1_CLK4_ALLOW_DS), \
 	CLK_SR_DCN35(CLK1_CLK5_ALLOW_DS), \
 	CLK_SR_DCN35(CLK5_spll_field_8), \
+	CLK_SR_DCN35(CLK6_spll_field_8), \
 	SR(DENTIST_DISPCLK_CNTL), \
 
 #define CLK_COMMON_MASK_SH_LIST_DCN32(mask_sh) \
@@ -307,7 +308,7 @@ struct clk_mgr_registers {
 	uint32_t CLK1_CLK4_ALLOW_DS;
 	uint32_t CLK1_CLK5_ALLOW_DS;
 	uint32_t CLK5_spll_field_8;
-
+	uint32_t CLK6_spll_field_8;
 };
 
 struct clk_mgr_shift {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
index 3a89cc0cffc1..6e303b81bfb0 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
@@ -967,23 +967,6 @@ struct mpc_funcs {
 	*/
 
 	void (*update_3dlut_fast_load_select)(struct mpc *mpc, int mpcc_id, int hubp_idx);
-	/**
-	* @get_3dlut_fast_load_status:
-	*
-	* Get 3D LUT fast load status and reference them with done, soft_underflow and hard_underflow pointers.
-	*
-	* Parameters:
-	* - [in/out] mpc - MPC context.
-	* - [in] mpcc_id
-	* - [in/out] done
-	* - [in/out] soft_underflow
-	* - [in/out] hard_underflow
-	*
-	* Return:
-	*
-	* void
-	*/
-	void (*get_3dlut_fast_load_status)(struct mpc *mpc, int mpcc_id, uint32_t *done, uint32_t *soft_underflow, uint32_t *hard_underflow);
 
 	/**
 	* @populate_lut:
@@ -1054,6 +1037,35 @@ struct mpc_funcs {
 	* void
 	*/
 	void (*program_3dlut_size)(struct mpc *mpc, bool is_17x17x17, int mpcc_id);
+
+	struct {
+		void (*program_3dlut_size)(struct mpc *mpc, uint32_t width, int mpcc_id);
+		void (*program_bias_scale)(struct mpc *mpc, uint16_t bias, uint16_t scale, int mpcc_id);
+		void (*program_bit_depth)(struct mpc *mpc, uint16_t bit_depth, int mpcc_id);
+		bool (*is_config_supported)(uint32_t width);
+		void (*program_lut_read_write_control)(struct mpc *mpc, const enum MCM_LUT_ID id,
+			bool lut_bank_a, bool enabled, int mpcc_id);
+
+		void (*populate_lut)(struct mpc *mpc, const union mcm_lut_params params,
+			bool lut_bank_a, int mpcc_id);
+	} mcm;
+
+	struct {
+		void (*enable_3dlut_fl)(struct mpc *mpc, bool enable, int mpcc_id);
+		void (*update_3dlut_fast_load_select)(struct mpc *mpc, int mpcc_id, int hubp_idx);
+		void (*program_lut_read_write_control)(struct mpc *mpc, const enum MCM_LUT_ID id,
+			bool lut_bank_a, bool enabled, int mpcc_id);
+		void (*program_lut_mode)(struct mpc *mpc, const enum MCM_LUT_XABLE xable,
+			bool lut_bank_a, int mpcc_id);
+		void (*program_3dlut_size)(struct mpc *mpc, uint32_t width, int mpcc_id);
+		void (*program_bias_scale)(struct mpc *mpc, uint16_t bias, uint16_t scale, int mpcc_id);
+		void (*program_bit_depth)(struct mpc *mpc, uint16_t bit_depth, int mpcc_id);
+		bool (*is_config_supported)(uint32_t width);
+
+		void (*power_on_shaper_3dlut)(struct mpc *mpc, uint32_t mpcc_id, bool power_on);
+		void (*populate_lut)(struct mpc *mpc, const union mcm_lut_params params,
+			bool lut_bank_a, int mpcc_id);
+	} rmcm;
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h
index 7f371cbb35cd..0d5a8358a778 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h
@@ -68,6 +68,7 @@ struct optc {
 	int pstate_keepout;
 	struct dc_crtc_timing orginal_patched_timing;
 	enum signal_type signal;
+	uint32_t max_frame_count;
 };
 
 void optc1_read_otg_state(struct timing_generator *optc, struct dcn_otg_state *s);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
index a402df225a76..26cb1459b743 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
@@ -508,6 +508,10 @@ uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx,
 				initial_val, \
 				n, __VA_ARGS__)
 
+#define IX_REG_SET_SYNC(index, init_value, f1, v1)	\
+		IX_REG_SET_N_SYNC(index, 1, init_value, \
+				FN(reg, f1), v1)
+
 #define IX_REG_SET_2_SYNC(index, init_value, f1, v1, f2, v2)	\
 		IX_REG_SET_N_SYNC(index, 2, init_value, \
 				FN(reg, f1), v1,\
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index 9458187b834d..7a87a7c07c1b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -628,8 +628,6 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc,
 		struct dc_state *context,
 		struct pipe_ctx *pipe_ctx);
 
-bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream);
-
 /* Get hw programming parameters container from pipe context
  * @pipe_ctx: pipe context
  * @dscl_prog_data: struct to hold programmable hw reg values
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c b/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c
index f0ac0aeeac51..f839afacd5a5 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c
@@ -191,6 +191,16 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = {
 	.ack = NULL
 };
 
+static struct irq_source_info_funcs vline1_irq_info_funcs = {
+	.set = NULL,
+	.ack = NULL
+};
+
+static struct irq_source_info_funcs vline2_irq_info_funcs = {
+	.set = NULL,
+	.ack = NULL
+};
+
 #undef BASE_INNER
 #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
 
@@ -259,6 +269,13 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = {
 		.funcs = &pflip_irq_info_funcs\
 	}
 
+#define vblank_int_entry(reg_num)\
+	[DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\
+		IRQ_REG_ENTRY(OTG, reg_num,\
+			OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\
+			OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\
+		.funcs = &vblank_irq_info_funcs\
+	}
 /* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic
  * of DCE's DC_IRQ_SOURCE_VUPDATEx.
  */
@@ -270,14 +287,6 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = {
 		.funcs = &vupdate_no_lock_irq_info_funcs\
 	}
 
-#define vblank_int_entry(reg_num)\
-	[DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\
-		IRQ_REG_ENTRY(OTG, reg_num,\
-			OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\
-			OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\
-		.funcs = &vblank_irq_info_funcs\
-}
-
 #define vline0_int_entry(reg_num)\
 	[DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\
 		IRQ_REG_ENTRY(OTG, reg_num,\
@@ -285,6 +294,20 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = {
 			OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\
 		.funcs = &vline0_irq_info_funcs\
 	}
+#define vline1_int_entry(reg_num)\
+	[DC_IRQ_SOURCE_DC1_VLINE1 + reg_num] = {\
+		IRQ_REG_ENTRY(OTG, reg_num,\
+			OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE,\
+			OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_CLEAR),\
+		.funcs = &vline1_irq_info_funcs\
+	}
+#define vline2_int_entry(reg_num)\
+	[DC_IRQ_SOURCE_DC1_VLINE2 + reg_num] = {\
+		IRQ_REG_ENTRY(OTG, reg_num,\
+			OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE,\
+			OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_CLEAR),\
+		.funcs = &vline2_irq_info_funcs\
+	}
 #define dmub_outbox_int_entry()\
 	[DC_IRQ_SOURCE_DMCUB_OUTBOX] = {\
 		IRQ_REG_ENTRY_DMUB(\
@@ -387,21 +410,29 @@ irq_source_info_dcn32[DAL_IRQ_SOURCES_NUMBER] = {
 	dc_underflow_int_entry(6),
 	[DC_IRQ_SOURCE_DMCU_SCP] = dummy_irq_entry(),
 	[DC_IRQ_SOURCE_VBIOS_SW] = dummy_irq_entry(),
-	vupdate_no_lock_int_entry(0),
-	vupdate_no_lock_int_entry(1),
-	vupdate_no_lock_int_entry(2),
-	vupdate_no_lock_int_entry(3),
 	vblank_int_entry(0),
 	vblank_int_entry(1),
 	vblank_int_entry(2),
 	vblank_int_entry(3),
+	[DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(),
+	[DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(),
+	dmub_outbox_int_entry(),
+	vupdate_no_lock_int_entry(0),
+	vupdate_no_lock_int_entry(1),
+	vupdate_no_lock_int_entry(2),
+	vupdate_no_lock_int_entry(3),
 	vline0_int_entry(0),
 	vline0_int_entry(1),
 	vline0_int_entry(2),
 	vline0_int_entry(3),
-	[DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(),
-	[DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(),
-	dmub_outbox_int_entry(),
+	vline1_int_entry(0),
+	vline1_int_entry(1),
+	vline1_int_entry(2),
+	vline1_int_entry(3),
+	vline2_int_entry(0),
+	vline2_int_entry(1),
+	vline2_int_entry(2),
+	vline2_int_entry(3)
 };
 
 static const struct irq_service_funcs irq_service_funcs_dcn32 = {
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c b/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c
index b43c9524b0de..8499e505cf3e 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c
@@ -171,6 +171,16 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = {
 	.ack = NULL
 };
 
+static struct irq_source_info_funcs vline1_irq_info_funcs = {
+	.set = NULL,
+	.ack = NULL
+};
+
+static struct irq_source_info_funcs vline2_irq_info_funcs = {
+	.set = NULL,
+	.ack = NULL
+};
+
 #undef BASE_INNER
 #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
 
@@ -239,6 +249,13 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = {
 		.funcs = &pflip_irq_info_funcs\
 	}
 
+#define vblank_int_entry(reg_num)\
+	[DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\
+		IRQ_REG_ENTRY(OTG, reg_num,\
+			OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\
+			OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\
+		.funcs = &vblank_irq_info_funcs\
+	}
 /* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic
  * of DCE's DC_IRQ_SOURCE_VUPDATEx.
  */
@@ -250,13 +267,6 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = {
 		.funcs = &vupdate_no_lock_irq_info_funcs\
 	}
 
-#define vblank_int_entry(reg_num)\
-	[DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\
-		IRQ_REG_ENTRY(OTG, reg_num,\
-			OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\
-			OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\
-		.funcs = &vblank_irq_info_funcs\
-	}
 #define vline0_int_entry(reg_num)\
 	[DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\
 		IRQ_REG_ENTRY(OTG, reg_num,\
@@ -264,6 +274,20 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = {
 			OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\
 		.funcs = &vline0_irq_info_funcs\
 	}
+#define vline1_int_entry(reg_num)\
+	[DC_IRQ_SOURCE_DC1_VLINE1 + reg_num] = {\
+		IRQ_REG_ENTRY(OTG, reg_num,\
+			OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE,\
+			OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_CLEAR),\
+		.funcs = &vline1_irq_info_funcs\
+	}
+#define vline2_int_entry(reg_num)\
+	[DC_IRQ_SOURCE_DC1_VLINE2 + reg_num] = {\
+		IRQ_REG_ENTRY(OTG, reg_num,\
+			OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE,\
+			OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_CLEAR),\
+		.funcs = &vline2_irq_info_funcs\
+	}
 #define dmub_outbox_int_entry()\
 	[DC_IRQ_SOURCE_DMCUB_OUTBOX] = {\
 		IRQ_REG_ENTRY_DMUB(\
@@ -364,21 +388,29 @@ irq_source_info_dcn401[DAL_IRQ_SOURCES_NUMBER] = {
 	dc_underflow_int_entry(6),
 	[DC_IRQ_SOURCE_DMCU_SCP] = dummy_irq_entry(),
 	[DC_IRQ_SOURCE_VBIOS_SW] = dummy_irq_entry(),
-	vupdate_no_lock_int_entry(0),
-	vupdate_no_lock_int_entry(1),
-	vupdate_no_lock_int_entry(2),
-	vupdate_no_lock_int_entry(3),
 	vblank_int_entry(0),
 	vblank_int_entry(1),
 	vblank_int_entry(2),
 	vblank_int_entry(3),
+	[DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(),
+	[DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(),
+	dmub_outbox_int_entry(),
+	vupdate_no_lock_int_entry(0),
+	vupdate_no_lock_int_entry(1),
+	vupdate_no_lock_int_entry(2),
+	vupdate_no_lock_int_entry(3),
 	vline0_int_entry(0),
 	vline0_int_entry(1),
 	vline0_int_entry(2),
 	vline0_int_entry(3),
-	[DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(),
-	[DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(),
-	dmub_outbox_int_entry(),
+	vline1_int_entry(0),
+	vline1_int_entry(1),
+	vline1_int_entry(2),
+	vline1_int_entry(3),
+	vline2_int_entry(0),
+	vline2_int_entry(1),
+	vline2_int_entry(2),
+	vline2_int_entry(3),
 };
 
 static const struct irq_service_funcs irq_service_funcs_dcn401 = {
diff --git a/drivers/gpu/drm/amd/display/dc/irq_types.h b/drivers/gpu/drm/amd/display/dc/irq_types.h
index 110f656d43ae..a2f7b933bebf 100644
--- a/drivers/gpu/drm/amd/display/dc/irq_types.h
+++ b/drivers/gpu/drm/amd/display/dc/irq_types.h
@@ -161,6 +161,20 @@ enum dc_irq_source {
 	DC_IRQ_SOURCE_DPCX_TX_PHYE,
 	DC_IRQ_SOURCE_DPCX_TX_PHYF,
 
+	DC_IRQ_SOURCE_DC1_VLINE2,
+	DC_IRQ_SOURCE_DC2_VLINE2,
+	DC_IRQ_SOURCE_DC3_VLINE2,
+	DC_IRQ_SOURCE_DC4_VLINE2,
+	DC_IRQ_SOURCE_DC5_VLINE2,
+	DC_IRQ_SOURCE_DC6_VLINE2,
+
+	DC_IRQ_SOURCE_DCI2C_RR_DDC1,
+	DC_IRQ_SOURCE_DCI2C_RR_DDC2,
+	DC_IRQ_SOURCE_DCI2C_RR_DDC3,
+	DC_IRQ_SOURCE_DCI2C_RR_DDC4,
+	DC_IRQ_SOURCE_DCI2C_RR_DDC5,
+	DC_IRQ_SOURCE_DCI2C_RR_DDC6,
+
 	DAL_IRQ_SOURCES_NUMBER
 };
 
@@ -170,6 +184,8 @@ enum irq_type
 	IRQ_TYPE_VUPDATE = DC_IRQ_SOURCE_VUPDATE1,
 	IRQ_TYPE_VBLANK = DC_IRQ_SOURCE_VBLANK1,
 	IRQ_TYPE_VLINE0 = DC_IRQ_SOURCE_DC1_VLINE0,
+	IRQ_TYPE_VLINE1 = DC_IRQ_SOURCE_DC1_VLINE1,
+	IRQ_TYPE_VLINE2 = DC_IRQ_SOURCE_DC1_VLINE2,
 	IRQ_TYPE_DCUNDERFLOW = DC_IRQ_SOURCE_DC1UNDERFLOW,
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
index f6b6b19e7481..0125f2cfc114 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
@@ -464,6 +464,7 @@ static bool construct_phy(struct dc_link *link,
 
 	link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
 	link->irq_source_hpd_rx = DC_IRQ_SOURCE_INVALID;
+	link->irq_source_read_request = DC_IRQ_SOURCE_INVALID;
 	link->link_status.dpcd_caps = &link->dpcd_caps;
 
 	link->dc = init_params->dc;
@@ -514,6 +515,9 @@ static bool construct_phy(struct dc_link *link,
 	case CONNECTOR_ID_HDMI_TYPE_A:
 		link->connector_signal = SIGNAL_TYPE_HDMI_TYPE_A;
 
+		if (link->hpd_gpio)
+			link->irq_source_read_request =
+					dal_irq_get_read_request(link->hpd_gpio);
 		break;
 	case CONNECTOR_ID_SINGLE_LINK_DVID:
 	case CONNECTOR_ID_SINGLE_LINK_DVII:
@@ -653,7 +657,7 @@ static bool construct_phy(struct dc_link *link,
 		}
 
 		/* Look for device tag that matches connector signal,
-		 * CRT for rgb, LCD for other supported signal tyes
+		 * CRT for rgb, LCD for other supported signal types
 		 */
 		if (!bp_funcs->is_device_id_supported(dc_ctx->dc_bios,
 						      link->device_tag.dev_id))
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
index 0d123e647652..c149210096ac 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
@@ -92,6 +92,7 @@ bool dpia_query_hpd_status(struct dc_link *link)
 
 	/* prepare QUERY_HPD command */
 	cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE;
+	cmd.query_hpd.header.payload_bytes = sizeof(cmd.query_hpd.data);
 	cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1;
 	cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA;
 
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
index 5be00e4ce10b..991b8ad4984b 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
@@ -229,6 +229,10 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link)
 
 		link->replay_settings.config.replay_error_status.raw |= replay_error_status.raw;
 
+		/* Increment desync error counter if a desync error is detected */
+		if (replay_configuration.bits.DESYNC_ERROR_STATUS)
+			link->replay_settings.replay_desync_error_fail_count++;
+
 		if (link->replay_settings.config.force_disable_desync_error_check)
 			return;
 
@@ -240,9 +244,6 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link)
 			&replay_configuration.raw,
 			sizeof(replay_configuration.raw));
 
-		/* Update desync error counter */
-		link->replay_settings.replay_desync_error_fail_count++;
-
 		/* Acknowledge and clear error bits */
 		dm_helpers_dp_write_dpcd(
 			link->ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
index 1e4adbc764ea..da74c2b5854f 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -524,7 +524,7 @@ bool edp_set_backlight_level(const struct dc_link *link,
 	struct dc  *dc = link->ctx->dc;
 	uint32_t backlight_pwm_u16_16 = backlight_level_params->backlight_pwm_u16_16;
 	uint32_t frame_ramp = backlight_level_params->frame_ramp;
-	DC_LOGGER_INIT(link->ctx->logger);
+
 	DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n",
 			backlight_pwm_u16_16, backlight_pwm_u16_16);
 
@@ -1022,6 +1022,9 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream
 			&alpm_config.raw,
 			sizeof(alpm_config.raw));
 	}
+
+	link->replay_settings.config.replay_video_conferencing_optimization_enabled = false;
+
 	return true;
 }
 
@@ -1130,11 +1133,11 @@ static struct abm *get_abm_from_stream_res(const struct dc_link *link)
 	struct abm *abm = NULL;
 
 	for (i = 0; i < MAX_PIPES; i++) {
-		struct pipe_ctx pipe_ctx = dc->current_state->res_ctx.pipe_ctx[i];
-		struct dc_stream_state *stream = pipe_ctx.stream;
+		struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+		struct dc_stream_state *stream = pipe_ctx->stream;
 
 		if (stream && stream->link == link) {
-			abm = pipe_ctx.stream_res.abm;
+			abm = pipe_ctx->stream_res.abm;
 			break;
 		}
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c
index a0e9e9f0441a..b4cea2b8cb2a 100644
--- a/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c
@@ -370,275 +370,279 @@ void mpc32_program_shaper_luta_settings(
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].red.custom_float_y);
 
 	curve = params->arr_curve_points;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_0_1[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_2_3[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_4_5[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_6_7[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_8_9[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_10_11[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_12_13[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_14_15[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_16_17[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_18_19[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_20_21[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_22_23[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_24_25[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_26_27[mpcc_id], 0,
+	if (curve) {
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_0_1[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
 
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_28_29[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_30_31[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_32_33[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-}
-
-
-void mpc32_program_shaper_lutb_settings(
-		struct mpc *mpc,
-		const struct pwl_params *params,
-		uint32_t mpcc_id)
-{
-	const struct gamma_curve *curve;
-	struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
-
-	REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_B[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].blue.custom_float_x,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0);
-	REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_G[mpcc_id], 0,
-			MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].green.custom_float_x,
-			MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0);
-	REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_R[mpcc_id], 0,
-			MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].red.custom_float_x,
-			MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0);
-
-	REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_B[mpcc_id], 0,
-			MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].blue.custom_float_x,
-			MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].blue.custom_float_y);
-	REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_G[mpcc_id], 0,
-			MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].green.custom_float_x,
-			MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].green.custom_float_y);
-	REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_R[mpcc_id], 0,
-			MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].red.custom_float_x,
-			MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].red.custom_float_y);
-
-	curve = params->arr_curve_points;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_0_1[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_2_3[mpcc_id], 0,
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_2_3[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
 
-
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_4_5[mpcc_id], 0,
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_4_5[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
 
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_6_7[mpcc_id], 0,
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_6_7[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
 
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_8_9[mpcc_id], 0,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
-		MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_8_9[mpcc_id], 0,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
 
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_10_11[mpcc_id], 0,
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_10_11[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
 
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_12_13[mpcc_id], 0,
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_12_13[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
 
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_14_15[mpcc_id], 0,
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_14_15[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
 
 
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_16_17[mpcc_id], 0,
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_16_17[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
 
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_18_19[mpcc_id], 0,
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_18_19[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
 
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_20_21[mpcc_id], 0,
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_20_21[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
 
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_22_23[mpcc_id], 0,
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_22_23[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
 
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_24_25[mpcc_id], 0,
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_24_25[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
 
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_26_27[mpcc_id], 0,
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_26_27[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_28_29[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
 
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_28_29[mpcc_id], 0,
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_30_31[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
 
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_30_31[mpcc_id], 0,
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_32_33[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+	}
+}
+
+
+void mpc32_program_shaper_lutb_settings(
+		struct mpc *mpc,
+		const struct pwl_params *params,
+		uint32_t mpcc_id)
+{
+	const struct gamma_curve *curve;
+	struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
+
+	REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_B[mpcc_id], 0,
+		MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].blue.custom_float_x,
+		MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0);
+	REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_G[mpcc_id], 0,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].green.custom_float_x,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0);
+	REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_R[mpcc_id], 0,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].red.custom_float_x,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0);
 
-	curve += 2;
-	REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_32_33[mpcc_id], 0,
+	REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_B[mpcc_id], 0,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].blue.custom_float_x,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].blue.custom_float_y);
+	REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_G[mpcc_id], 0,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].green.custom_float_x,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].green.custom_float_y);
+	REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_R[mpcc_id], 0,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].red.custom_float_x,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].red.custom_float_y);
+
+	curve = params->arr_curve_points;
+	if (curve) {
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_0_1[mpcc_id], 0,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
 			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_2_3[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_4_5[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_6_7[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_8_9[mpcc_id], 0,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+			MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_10_11[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_12_13[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_14_15[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_16_17[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_18_19[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_20_21[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_22_23[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_24_25[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_26_27[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_28_29[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_30_31[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+		curve += 2;
+		REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_32_33[mpcc_id], 0,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+				MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+	}
 }
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c
index ad67197557ca..98cf0cbd59ba 100644
--- a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c
@@ -47,16 +47,6 @@ void mpc401_update_3dlut_fast_load_select(struct mpc *mpc, int mpcc_id, int hubp
 	REG_SET(MPCC_MCM_3DLUT_FAST_LOAD_SELECT[mpcc_id], 0, MPCC_MCM_3DLUT_FL_SEL, hubp_idx);
 }
 
-void mpc401_get_3dlut_fast_load_status(struct mpc *mpc, int mpcc_id, uint32_t *done, uint32_t *soft_underflow, uint32_t *hard_underflow)
-{
-	struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc);
-
-	REG_GET_3(MPCC_MCM_3DLUT_FAST_LOAD_STATUS[mpcc_id],
-			MPCC_MCM_3DLUT_FL_DONE, done,
-			MPCC_MCM_3DLUT_FL_SOFT_UNDERFLOW, soft_underflow,
-			MPCC_MCM_3DLUT_FL_HARD_UNDERFLOW, hard_underflow);
-}
-
 void mpc401_set_movable_cm_location(struct mpc *mpc, enum mpcc_movable_cm_location location, int mpcc_id)
 {
 	struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc);
@@ -618,7 +608,6 @@ static const struct mpc_funcs dcn401_mpc_funcs = {
 	.set_bg_color = mpc1_set_bg_color,
 	.set_movable_cm_location = mpc401_set_movable_cm_location,
 	.update_3dlut_fast_load_select = mpc401_update_3dlut_fast_load_select,
-	.get_3dlut_fast_load_status = mpc401_get_3dlut_fast_load_status,
 	.populate_lut = mpc401_populate_lut,
 	.program_lut_read_write_control = mpc401_program_lut_read_write_control,
 	.program_lut_mode = mpc401_program_lut_mode,
diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h
index ce6fbcf14d7a..8e35ebc603a9 100644
--- a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h
@@ -241,23 +241,9 @@ void mpc401_update_3dlut_fast_load_select(
 	int mpcc_id,
 	int hubp_idx);
 
-void mpc401_get_3dlut_fast_load_status(
-	struct mpc *mpc,
-	int mpcc_id,
-	uint32_t *done,
-	uint32_t *soft_underflow,
-	uint32_t *hard_underflow);
-
 void mpc401_update_3dlut_fast_load_select(
 	struct mpc *mpc,
 	int mpcc_id,
 	int hubp_idx);
 
-void mpc401_get_3dlut_fast_load_status(
-	struct mpc *mpc,
-	int mpcc_id,
-	uint32_t *done,
-	uint32_t *soft_underflow,
-	uint32_t *hard_underflow);
-
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
index b86fe2b094f8..4cfc6c0fa147 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
@@ -507,6 +507,7 @@ void dcn35_timing_generator_init(struct optc *optc1)
 	optc1->min_v_blank_interlace = 5;
 	optc1->min_h_sync_width = 4;
 	optc1->min_v_sync_width = 1;
+	optc1->max_frame_count = 0xFFFFFF;
 
 	dcn35_timing_generator_set_fgcg(
 		optc1, CTX->dc->debug.enable_fine_grain_clock_gating.bits.optc);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/Makefile b/drivers/gpu/drm/amd/display/dc/resource/Makefile
index b8cddef6b3d2..5b42da8b79c2 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/resource/Makefile
@@ -27,6 +27,24 @@
 #  DCE
 ###############################################################################
 
+ifdef CONFIG_DRM_AMD_DC_SI
+RESOURCE_DCE60 = dce60_resource.o
+
+AMD_DAL_RESOURCE_DCE60 = $(addprefix $(AMDDALPATH)/dc/resource/dce60/,$(RESOURCE_DCE60))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE60)
+endif
+
+###############################################################################
+
+RESOURCE_DCE80 = dce80_resource.o
+
+AMD_DAL_RESOURCE_DCE80 = $(addprefix $(AMDDALPATH)/dc/resource/dce80/,$(RESOURCE_DCE80))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE80)
+
+###############################################################################
+
 RESOURCE_DCE100 = dce100_resource.o
 
 AMD_DAL_RESOURCE_DCE100 = $(addprefix $(AMDDALPATH)/dc/resource/dce100/,$(RESOURCE_DCE100))
@@ -57,14 +75,6 @@ AMD_DAL_RESOURCE_DCE120 = $(addprefix $(AMDDALPATH)/dc/resource/dce120/,$(RESOUR
 
 AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE120)
 
-###############################################################################
-
-RESOURCE_DCE80 = dce80_resource.o
-
-AMD_DAL_RESOURCE_DCE80 = $(addprefix $(AMDDALPATH)/dc/resource/dce80/,$(RESOURCE_DCE80))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE80)
-
 ifdef CONFIG_DRM_AMD_DC_FP
 ###############################################################################
 # DCN
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
index e698543ec937..84f73fdb0f95 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
@@ -836,7 +836,7 @@ static enum dc_status build_mapped_resource(
 	return DC_OK;
 }
 
-static bool dce100_validate_bandwidth(
+static enum dc_status dce100_validate_bandwidth(
 	struct dc  *dc,
 	struct dc_state *context,
 	bool fast_validate)
@@ -858,7 +858,7 @@ static bool dce100_validate_bandwidth(
 		context->bw_ctx.bw.dce.yclk_khz = 0;
 	}
 
-	return true;
+	return DC_OK;
 }
 
 static bool dce100_validate_surface_sets(
@@ -1069,7 +1069,7 @@ static bool dce100_resource_construct(
 	pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator;
 	dc->caps.max_downscale_ratio = 200;
 	dc->caps.i2c_speed_in_khz = 40;
-	dc->caps.i2c_speed_in_khz = 40;
+	dc->caps.i2c_speed_in_khz_hdcp = 40;
 	dc->caps.max_cursor_size = 128;
 	dc->caps.min_horizontal_blanking_period = 80;
 	dc->caps.dual_link_dvi = true;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c
index 035c6cfdaee5..f3d5baac11bf 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c
@@ -960,7 +960,7 @@ static enum dc_status build_mapped_resource(
 	return DC_OK;
 }
 
-static bool dce110_validate_bandwidth(
+static enum dc_status dce110_validate_bandwidth(
 	struct dc *dc,
 	struct dc_state *context,
 	bool fast_validate)
@@ -1031,7 +1031,7 @@ static bool dce110_validate_bandwidth(
 			context->bw_ctx.bw.dce.yclk_khz,
 			context->bw_ctx.bw.dce.blackout_recovery_time_us);
 	}
-	return result;
+	return result ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
 }
 
 static enum dc_status dce110_validate_plane(const struct dc_plane_state *plane_state,
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c
index 480a50967385..4225cae68c10 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c
@@ -883,7 +883,7 @@ static enum dc_status build_mapped_resource(
 	return DC_OK;
 }
 
-bool dce112_validate_bandwidth(
+enum dc_status dce112_validate_bandwidth(
 	struct dc *dc,
 	struct dc_state *context,
 	bool fast_validate)
@@ -952,7 +952,7 @@ bool dce112_validate_bandwidth(
 			context->bw_ctx.bw.dce.yclk_khz,
 			context->bw_ctx.bw.dce.blackout_recovery_time_us);
 	}
-	return result;
+	return result ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
 }
 
 enum dc_status resource_map_phy_clock_resources(
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h
index 1f57ebc6f9b4..6221d749246d 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h
@@ -42,7 +42,7 @@ enum dc_status dce112_validate_with_context(
 		struct dc_state *context,
 		struct dc_state *old_context);
 
-bool dce112_validate_bandwidth(
+enum dc_status dce112_validate_bandwidth(
 	struct dc *dc,
 	struct dc_state *context,
 	bool fast_validate);
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c
index 889f314cac65..d9ffdded5ce1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c
@@ -48,7 +48,7 @@
 #include "dce/dce_clock_source.h"
 #include "dce/dce_audio.h"
 #include "dce/dce_hwseq.h"
-#include "dce60/dce60_hw_sequencer.h"
+#include "dce60/dce60_hwseq.h"
 #include "dce100/dce100_resource.h"
 #include "dce/dce_panel_cntl.h"
 
@@ -863,7 +863,7 @@ static void dce60_resource_destruct(struct dce110_resource_pool *pool)
 	}
 }
 
-static bool dce60_validate_bandwidth(
+static enum dc_status dce60_validate_bandwidth(
 	struct dc *dc,
 	struct dc_state *context,
 	bool fast_validate)
@@ -885,7 +885,7 @@ static bool dce60_validate_bandwidth(
 		context->bw_ctx.bw.dce.yclk_khz = 0;
 	}
 
-	return true;
+	return DC_OK;
 }
 
 static bool dce60_validate_surface_sets(
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.h
index 5d653a76b0b0..5d653a76b0b0 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
index 3d5113f010bb..bd5811f97531 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
@@ -869,7 +869,7 @@ static void dce80_resource_destruct(struct dce110_resource_pool *pool)
 	}
 }
 
-static bool dce80_validate_bandwidth(
+static enum dc_status dce80_validate_bandwidth(
 	struct dc *dc,
 	struct dc_state *context,
 	bool fast_validate)
@@ -891,7 +891,7 @@ static bool dce80_validate_bandwidth(
 		context->bw_ctx.bw.dce.yclk_khz = 0;
 	}
 
-	return true;
+	return DC_OK;
 }
 
 static bool dce80_validate_surface_sets(
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
index e92f14d50adb..be4ade0853e9 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
@@ -23,6 +23,7 @@
  *
  */
 
+#include "core_status.h"
 #include "dm_services.h"
 #include "dc.h"
 
@@ -1125,7 +1126,7 @@ static void dcn10_destroy_resource_pool(struct resource_pool **pool)
 	*pool = NULL;
 }
 
-static bool dcn10_validate_bandwidth(
+static enum dc_status dcn10_validate_bandwidth(
 		struct dc *dc,
 		struct dc_state *context,
 		bool fast_validate)
@@ -1136,7 +1137,7 @@ static bool dcn10_validate_bandwidth(
 	voltage_supported = dcn_validate_bandwidth(dc, context, fast_validate);
 	DC_FP_END();
 
-	return voltage_supported;
+	return voltage_supported ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
 }
 
 static enum dc_status dcn10_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps)
@@ -1245,6 +1246,10 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link(
 			if (link->ep_type == DISPLAY_ENDPOINT_PHY && pool->stream_enc[i]->id ==
 					link->link_enc->preferred_engine)
 				return pool->stream_enc[i];
+
+			if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && pool->stream_enc[i]->id ==
+					link->dpia_preferred_eng_id)
+				return pool->stream_enc[i];
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
index e4eca3e32c1b..3405be07f5e3 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
@@ -2124,7 +2124,7 @@ validate_out:
 	return out;
 }
 
-bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
+enum dc_status dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
 		bool fast_validate)
 {
 	bool voltage_supported;
@@ -2132,14 +2132,14 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
 
 	pipes = kcalloc(dc->res_pool->pipe_count, sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
 	if (!pipes)
-		return false;
+		return DC_FAIL_BANDWIDTH_VALIDATE;
 
 	DC_FP_START();
 	voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate, pipes);
 	DC_FP_END();
 
 	kfree(pipes);
-	return voltage_supported;
+	return voltage_supported ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
 }
 
 struct pipe_ctx *dcn20_acquire_free_pipe_for_layer(
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h
index 4cee3fa11a7f..c0e062c7407d 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h
@@ -119,7 +119,7 @@ void dcn20_set_mcif_arb_params(
 		struct dc_state *context,
 		display_e2e_pipe_params_st *pipes,
 		int pipe_cnt);
-bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate);
+enum dc_status dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate);
 void dcn20_merge_pipes_for_validate(
 		struct dc *dc,
 		struct dc_state *context);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
index 4bd5c2278596..9ab01b65b177 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
@@ -923,7 +923,7 @@ validate_out:
  * with DC_FP_START()/DC_FP_END(). Use the same approach as for
  * dcn20_validate_bandwidth in dcn20_resource.c.
  */
-static bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context,
+static enum dc_status dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context,
 		bool fast_validate)
 {
 	bool voltage_supported;
@@ -931,14 +931,14 @@ static bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context,
 
 	pipes = kcalloc(dc->res_pool->pipe_count, sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
 	if (!pipes)
-		return false;
+		return DC_FAIL_BANDWIDTH_VALIDATE;
 
 	DC_FP_START();
 	voltage_supported = dcn21_validate_bandwidth_fp(dc, context, fast_validate, pipes);
 	DC_FP_END();
 
 	kfree(pipes);
-	return voltage_supported;
+	return voltage_supported ? DC_OK : DC_NOT_SUPPORTED;
 }
 
 static void dcn21_destroy_resource_pool(struct resource_pool **pool)
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
index f01ced015072..f631ae34e320 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
@@ -1891,8 +1891,6 @@ static int get_refresh_rate(struct dc_state *context)
 
 	/* check if refresh rate at least 120hz */
 	timing = &context->streams[0]->timing;
-	if (timing == NULL)
-		return 0;
 
 	h_v_total = timing->h_total * timing->v_total;
 	if (h_v_total == 0)
@@ -2037,7 +2035,7 @@ void dcn30_calculate_wm_and_dlg(
 	DC_FP_END();
 }
 
-bool dcn30_validate_bandwidth(struct dc *dc,
+enum dc_status dcn30_validate_bandwidth(struct dc *dc,
 		struct dc_state *context,
 		bool fast_validate)
 {
@@ -2094,7 +2092,7 @@ validate_out:
 
 	BW_VAL_TRACE_FINISH();
 
-	return out;
+	return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
 }
 
 void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h
index 8e6b8b7368fd..689d9bdace81 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h
@@ -56,7 +56,7 @@ unsigned int dcn30_calc_max_scaled_time(
 		enum mmhubbub_wbif_mode mode,
 		unsigned int urgent_watermark);
 
-bool dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context,
+enum dc_status dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context,
 		bool fast_validate);
 bool dcn30_internal_validate_bw(
 		struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
index dddddbfef85f..7e0af5297dc4 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
@@ -1758,7 +1758,7 @@ dcn31_set_mcif_arb_params(struct dc *dc,
 	DC_FP_END();
 }
 
-bool dcn31_validate_bandwidth(struct dc *dc,
+enum dc_status dcn31_validate_bandwidth(struct dc *dc,
 		struct dc_state *context,
 		bool fast_validate)
 {
@@ -1813,7 +1813,7 @@ validate_out:
 
 	BW_VAL_TRACE_FINISH();
 
-	return out;
+	return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
 }
 
 static void dcn31_get_panel_config_defaults(struct dc_panel_config *panel_config)
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
index 551ad912f7be..dd82815d7efe 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
@@ -37,7 +37,7 @@ struct dcn31_resource_pool {
 	struct resource_pool base;
 };
 
-bool dcn31_validate_bandwidth(struct dc *dc,
+enum dc_status dcn31_validate_bandwidth(struct dc *dc,
 		struct dc_state *context,
 		bool fast_validate);
 void dcn31_calculate_wm_and_dlg(
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
index 26becc4cb804..d96bc6cb73ad 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
@@ -1694,7 +1694,7 @@ static void dcn314_get_panel_config_defaults(struct dc_panel_config *panel_confi
 	*panel_config = panel_config_defaults;
 }
 
-bool dcn314_validate_bandwidth(struct dc *dc,
+enum dc_status dcn314_validate_bandwidth(struct dc *dc,
 		struct dc_state *context,
 		bool fast_validate)
 {
@@ -1750,7 +1750,7 @@ validate_out:
 
 	BW_VAL_TRACE_FINISH();
 
-	return out;
+	return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
 }
 
 static struct resource_funcs dcn314_res_pool_funcs = {
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h
index 49ffe71018df..f8ba531d6342 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h
@@ -39,7 +39,7 @@ struct dcn314_resource_pool {
 	struct resource_pool base;
 };
 
-bool dcn314_validate_bandwidth(struct dc *dc,
+enum dc_status dcn314_validate_bandwidth(struct dc *dc,
 		struct dc_state *context,
 		bool fast_validate);
 
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index 2a59cc61ed8c..bb0dae0be5b8 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -24,6 +24,7 @@
  *
  */
 
+#include "dc_types.h"
 #include "dm_services.h"
 #include "dc.h"
 
@@ -1806,19 +1807,56 @@ validate_out:
 	return out;
 }
 
-bool dcn32_validate_bandwidth(struct dc *dc,
+enum dc_status dcn32_validate_bandwidth(struct dc *dc,
 		struct dc_state *context,
 		bool fast_validate)
 {
-	bool out = false;
+	unsigned int i;
+	enum dc_status status;
+	const struct dc_stream_state *stream;
+
+	/* reset cursor limitations on subvp */
+	for (i = 0; i < context->stream_count; i++) {
+		stream = context->streams[i];
+
+		if (dc_state_can_clear_stream_cursor_subvp_limit(stream, context)) {
+			dc_state_set_stream_cursor_subvp_limit(stream, context, false);
+		}
+	}
 
 	if (dc->debug.using_dml2)
-		out = dml2_validate(dc, context,
+		status = dml2_validate(dc, context,
 				context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2,
-				fast_validate);
+				fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
 	else
-		out = dml1_validate(dc, context, fast_validate);
-	return out;
+		status = dml1_validate(dc, context, fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+
+	if (!fast_validate && status == DC_OK && dc_state_is_subvp_in_use(context)) {
+		/* check new stream configuration still supports cursor if subvp used */
+		for (i = 0; i < context->stream_count; i++) {
+			stream = context->streams[i];
+
+			if (dc_state_get_stream_subvp_type(context, stream) != SUBVP_PHANTOM &&
+					stream->cursor_position.enable &&
+					!dc_stream_check_cursor_attributes(stream, context, &stream->cursor_attributes)) {
+				/* hw cursor cannot be supported with subvp active, so disable subvp for now */
+				dc_state_set_stream_cursor_subvp_limit(stream, context, true);
+				status = DC_FAIL_HW_CURSOR_SUPPORT;
+			}
+		};
+	}
+
+	if (!fast_validate && status == DC_FAIL_HW_CURSOR_SUPPORT) {
+		/* attempt to validate again with subvp disabled due to cursor */
+		if (dc->debug.using_dml2)
+			status = dml2_validate(dc, context,
+					context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2,
+					fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+		else
+			status = dml1_validate(dc, context, fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+	}
+
+	return status;
 }
 
 int dcn32_populate_dml_pipes_from_context(
@@ -2042,6 +2080,18 @@ static void dcn32_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw
 	DC_FP_END();
 }
 
+unsigned int dcn32_get_max_hw_cursor_size(const struct dc *dc,
+			struct dc_state *state,
+			const struct dc_stream_state *stream)
+{
+	bool limit_cur_to_buf;
+
+	limit_cur_to_buf = dc_state_get_stream_subvp_cursor_limit(stream, state) &&
+			!stream->hw_cursor_req;
+
+	return limit_cur_to_buf ? dc->caps.max_buffered_cursor_size : dc->caps.max_cursor_size;
+}
+
 static struct resource_funcs dcn32_res_pool_funcs = {
 	.destroy = dcn32_destroy_resource_pool,
 	.link_enc_create = dcn32_link_encoder_create,
@@ -2067,7 +2117,8 @@ static struct resource_funcs dcn32_res_pool_funcs = {
 	.add_phantom_pipes = dcn32_add_phantom_pipes,
 	.build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params,
 	.calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes,
-	.get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
+	.get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe,
+	.get_max_hw_cursor_size = dcn32_get_max_hw_cursor_size,
 };
 
 static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -2114,8 +2165,6 @@ static bool dcn32_resource_construct(
 #define REG_STRUCT dccg_regs
 	dccg_regs_init();
 
-	DC_FP_START();
-
 	ctx->dc_bios->regs = &bios_regs;
 
 	pool->base.res_cap = &res_cap_dcn32;
@@ -2153,6 +2202,7 @@ static bool dcn32_resource_construct(
 	dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/
 	/* TODO: Bring max_cursor_size back to 256 after subvp cursor corruption is fixed*/
 	dc->caps.max_cursor_size = 64;
+	dc->caps.max_buffered_cursor_size = 64; // sqrt(16 * 1024 / 4)
 	dc->caps.min_horizontal_blanking_period = 80;
 	dc->caps.dmdata_alloc_size = 2048;
 	dc->caps.mall_size_per_mem_channel = 4;
@@ -2501,14 +2551,10 @@ static bool dcn32_resource_construct(
 	if (ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev) && (dc->config.sdpif_request_limit_words_per_umc == 0))
 		dc->config.sdpif_request_limit_words_per_umc = 16;
 
-	DC_FP_END();
-
 	return true;
 
 create_fail:
 
-	DC_FP_END();
-
 	dcn32_resource_destruct(pool);
 
 	return false;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
index 1aa4ced29291..d60ed77eda80 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
@@ -98,7 +98,7 @@ void dcn32_add_phantom_pipes(struct dc *dc,
 		unsigned int pipe_cnt,
 		unsigned int index);
 
-bool dcn32_validate_bandwidth(struct dc *dc,
+enum dc_status dcn32_validate_bandwidth(struct dc *dc,
 		struct dc_state *context,
 		bool fast_validate);
 
@@ -188,6 +188,10 @@ void dcn32_override_min_req_dcfclk(struct dc *dc, struct dc_state *context);
 
 unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned int total_size_in_mall_bytes);
 
+unsigned int dcn32_get_max_hw_cursor_size(const struct dc *dc,
+			struct dc_state *state,
+			const struct dc_stream_state *stream);
+
 /* definitions for run time init of reg offsets */
 
 /* CLK SRC */
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
index 38d76434683e..7db1f7a5613f 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -1624,7 +1624,8 @@ static struct resource_funcs dcn321_res_pool_funcs = {
 	.add_phantom_pipes = dcn32_add_phantom_pipes,
 	.build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params,
 	.calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes,
-	.get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
+	.get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe,
+	.get_max_hw_cursor_size = dcn32_get_max_hw_cursor_size,
 };
 
 static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -1709,6 +1710,7 @@ static bool dcn321_resource_construct(
 	dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/
 	/* TODO: Bring max cursor size back to 256 after subvp cursor corruption is fixed*/
 	dc->caps.max_cursor_size = 64;
+	dc->caps.max_buffered_cursor_size = 64; // sqrt(16 * 1024 / 4)
 	dc->caps.min_horizontal_blanking_period = 80;
 	dc->caps.dmdata_alloc_size = 2048;
 	dc->caps.mall_size_per_mem_channel = 4;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index ffd2b816cd02..72c6cf047db0 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -1732,7 +1732,7 @@ static void dcn35_get_panel_config_defaults(struct dc_panel_config *panel_config
 }
 
 
-static bool dcn35_validate_bandwidth(struct dc *dc,
+static enum dc_status dcn35_validate_bandwidth(struct dc *dc,
 		struct dc_state *context,
 		bool fast_validate)
 {
@@ -1743,13 +1743,13 @@ static bool dcn35_validate_bandwidth(struct dc *dc,
 			fast_validate);
 
 	if (fast_validate)
-		return out;
+		return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
 
 	DC_FP_START();
 	dcn35_decide_zstate_support(dc, context);
 	DC_FP_END();
 
-	return out;
+	return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
 }
 
 enum dc_status dcn35_patch_unknown_plane_state(struct dc_plane_state *plane_state)
@@ -1903,7 +1903,7 @@ static bool dcn35_resource_construct(
 	dc->caps.max_disp_clock_khz_at_vmin = 650000;
 
 	/* Sequential ONO is based on ASIC. */
-	if (dc->ctx->asic_id.hw_internal_rev > 0x10)
+	if (dc->ctx->asic_id.hw_internal_rev >= 0x40)
 		dc->caps.sequential_ono = true;
 
 	/* Use pipe context based otg sync logic */
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
index 98f5bc1b929e..989a270f7dea 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
@@ -1712,7 +1712,7 @@ static void dcn35_get_panel_config_defaults(struct dc_panel_config *panel_config
 }
 
 
-static bool dcn351_validate_bandwidth(struct dc *dc,
+static enum dc_status dcn351_validate_bandwidth(struct dc *dc,
 		struct dc_state *context,
 		bool fast_validate)
 {
@@ -1723,13 +1723,13 @@ static bool dcn351_validate_bandwidth(struct dc *dc,
 			fast_validate);
 
 	if (fast_validate)
-		return out;
+		return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
 
 	DC_FP_START();
 	dcn35_decide_zstate_support(dc, context);
 	DC_FP_END();
 
-	return out;
+	return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
 }
 
 static struct resource_funcs dcn351_res_pool_funcs = {
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
index b6468573dc33..48e1f234185f 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
@@ -1713,7 +1713,7 @@ static void dcn35_get_panel_config_defaults(struct dc_panel_config *panel_config
 }
 
 
-static bool dcn35_validate_bandwidth(struct dc *dc,
+static enum dc_status dcn35_validate_bandwidth(struct dc *dc,
 		struct dc_state *context,
 		bool fast_validate)
 {
@@ -1724,13 +1724,13 @@ static bool dcn35_validate_bandwidth(struct dc *dc,
 			fast_validate);
 
 	if (fast_validate)
-		return out;
+		return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
 
 	DC_FP_START();
 	dcn35_decide_zstate_support(dc, context);
 	DC_FP_END();
 
-	return out;
+	return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
 }
 
 
@@ -1876,7 +1876,7 @@ static bool dcn36_resource_construct(
 	dc->caps.max_disp_clock_khz_at_vmin = 650000;
 
 	/* Sequential ONO is based on ASIC. */
-	if (dc->ctx->asic_id.hw_internal_rev > 0x10)
+	if (dc->ctx->asic_id.hw_internal_rev >= 0x40)
 		dc->caps.sequential_ono = true;
 
 	/* Use pipe context based otg sync logic */
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
index 7436dfbdf927..e0e32975ca34 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
@@ -1642,16 +1642,52 @@ enum dc_status dcn401_patch_unknown_plane_state(struct dc_plane_state *plane_sta
 	return DC_OK;
 }
 
-bool dcn401_validate_bandwidth(struct dc *dc,
+enum dc_status dcn401_validate_bandwidth(struct dc *dc,
 		struct dc_state *context,
 		bool fast_validate)
 {
-	bool out = false;
+	unsigned int i;
+	enum dc_status status = DC_OK;
+	const struct dc_stream_state *stream;
+
+	/* reset cursor limitations on subvp */
+	for (i = 0; i < context->stream_count; i++) {
+		stream = context->streams[i];
+
+		if (dc_state_can_clear_stream_cursor_subvp_limit(stream, context)) {
+			dc_state_set_stream_cursor_subvp_limit(stream, context, false);
+		}
+	}
+
 	if (dc->debug.using_dml2)
-		out = dml2_validate(dc, context,
+		status = dml2_validate(dc, context,
 				context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2,
-				fast_validate);
-	return out;
+				fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+
+	if (!fast_validate && status == DC_OK && dc_state_is_subvp_in_use(context)) {
+		/* check new stream configuration still supports cursor if subvp used */
+		for (i = 0; i < context->stream_count; i++) {
+			stream = context->streams[i];
+
+			if (dc_state_get_stream_subvp_type(context, stream) != SUBVP_PHANTOM &&
+					stream->cursor_position.enable &&
+					!dc_stream_check_cursor_attributes(stream, context, &stream->cursor_attributes))	{
+				/* hw cursor cannot be supported with subvp active, so disable subvp for now */
+				dc_state_set_stream_cursor_subvp_limit(stream, context, true);
+				status = DC_FAIL_HW_CURSOR_SUPPORT;
+			}
+		};
+	}
+
+	if (!fast_validate && status == DC_FAIL_HW_CURSOR_SUPPORT) {
+		/* attempt to validate again with subvp disabled due to cursor */
+		if (dc->debug.using_dml2)
+			status = dml2_validate(dc, context,
+					context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2,
+					fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+	}
+
+	return status;
 }
 
 void dcn401_prepare_mcache_programming(struct dc *dc,
@@ -1770,7 +1806,8 @@ static struct resource_funcs dcn401_res_pool_funcs = {
 	.build_pipe_pix_clk_params = dcn401_build_pipe_pix_clk_params,
 	.calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes,
 	.get_power_profile = dcn401_get_power_profile,
-	.get_vstartup_for_pipe = dcn401_get_vstartup_for_pipe
+	.get_vstartup_for_pipe = dcn401_get_vstartup_for_pipe,
+	.get_max_hw_cursor_size = dcn32_get_max_hw_cursor_size
 };
 
 static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -1846,8 +1883,9 @@ static bool dcn401_resource_construct(
 	dc->caps.max_downscale_ratio = 600;
 	dc->caps.i2c_speed_in_khz = 95;
 	dc->caps.i2c_speed_in_khz_hdcp = 95; /*1.4 w/a applied by default*/
-	/* TODO: Bring max cursor size back to 256 after subvp cursor corruption is fixed*/
+	/* used to set cursor pitch, so must be aligned to power of 2 (HW actually supported 78x78) */
 	dc->caps.max_cursor_size = 64;
+	dc->caps.max_buffered_cursor_size = 64;
 	dc->caps.cursor_not_scaled = true;
 	dc->caps.min_horizontal_blanking_period = 80;
 	dc->caps.dmdata_alloc_size = 2048;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h
index 4c259745d519..dc52a30991af 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h
@@ -22,7 +22,7 @@ struct resource_pool *dcn401_create_resource_pool(
 
 enum dc_status dcn401_patch_unknown_plane_state(struct dc_plane_state *plane_state);
 
-bool dcn401_validate_bandwidth(struct dc *dc,
+enum dc_status dcn401_validate_bandwidth(struct dc *dc,
 		struct dc_state *context,
 		bool fast_validate);
 
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c
index 28348734d900..002eb926cca4 100644
--- a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c
+++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c
@@ -776,7 +776,7 @@ static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in,
 	 * Do not bypass UV at 1:1 for cositing to be applied
 	 */
 	if (!enable_isharp) {
-		if (data->ratios.horz.value == one && data->ratios.vert.value == one)
+		if (data->ratios.horz.value == one && data->ratios.vert.value == one && !spl_in->basic_out.always_scale)
 			return SCL_MODE_SCALING_420_LUMA_BYPASS;
 	}
 
@@ -884,7 +884,7 @@ static bool spl_get_isharp_en(struct spl_in *spl_in,
 
 /* Calculate number of tap with adaptive scaling off */
 static void spl_get_taps_non_adaptive_scaler(
-	  struct spl_scratch *spl_scratch, const struct spl_taps *in_taps)
+	  struct spl_scratch *spl_scratch, const struct spl_taps *in_taps, bool always_scale)
 {
 	bool check_max_downscale = false;
 
@@ -944,15 +944,15 @@ static void spl_get_taps_non_adaptive_scaler(
 		spl_fixpt_from_fraction(6, 1));
 	SPL_ASSERT(check_max_downscale);
 
-	if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz))
+
+	if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz) && !always_scale)
 		spl_scratch->scl_data.taps.h_taps = 1;
-	if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert))
+	if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert) && !always_scale)
 		spl_scratch->scl_data.taps.v_taps = 1;
-	if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c))
+	if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !always_scale)
 		spl_scratch->scl_data.taps.h_taps_c = 1;
-	if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c))
+	if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !always_scale)
 		spl_scratch->scl_data.taps.v_taps_c = 1;
-
 }
 
 /* Calculate optimal number of taps */
@@ -965,13 +965,15 @@ static bool spl_get_optimal_number_of_taps(
 	unsigned int max_taps_y, max_taps_c;
 	unsigned int min_taps_y, min_taps_c;
 	enum lb_memory_config lb_config;
-	bool skip_easf = false;
+	bool skip_easf     = false;
+	bool always_scale  = spl_in->basic_out.always_scale;
 	bool is_subsampled = spl_is_subsampled_format(spl_in->basic_in.format);
 
+
 	if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active &&
 		max_downscale_src_width != 0 &&
 		spl_scratch->scl_data.viewport.width > max_downscale_src_width) {
-		spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps);
+		spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps, always_scale);
 		*enable_easf_v = false;
 		*enable_easf_h = false;
 		*enable_isharp = false;
@@ -980,7 +982,7 @@ static bool spl_get_optimal_number_of_taps(
 
 	/* Disable adaptive scaler and sharpener when integer scaling is enabled */
 	if (spl_in->scaling_quality.integer_scaling) {
-		spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps);
+		spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps, always_scale);
 		*enable_easf_v = false;
 		*enable_easf_h = false;
 		*enable_isharp = false;
@@ -996,7 +998,7 @@ static bool spl_get_optimal_number_of_taps(
 	 * taps = 4 for upscaling
 	 */
 	if (skip_easf)
-		spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps);
+		spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps, always_scale);
 	else {
 		if (spl_is_video_format(spl_in->basic_in.format)) {
 			spl_scratch->scl_data.taps.h_taps = 6;
@@ -1898,3 +1900,4 @@ bool SPL_NAMESPACE(spl_get_number_of_taps(struct spl_in *spl_in, struct spl_out
 	spl_set_taps_data(dscl_prog_data, data);
 	return res;
 }
+
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c
index 52d97918a3bd..ebf0287417e0 100644
--- a/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c
+++ b/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c
@@ -29,8 +29,6 @@ static inline unsigned long long spl_complete_integer_division_u64(
 {
 	unsigned long long result;
 
-	SPL_ASSERT(divisor);
-
 	result = spl_div64_u64_rem(dividend, divisor, remainder);
 
 	return result;
@@ -196,8 +194,6 @@ struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg)
 	 * Good idea to use Newton's method
 	 */
 
-	SPL_ASSERT(arg.value);
-
 	return spl_fixpt_from_fraction(
 		spl_fixpt_one.value,
 		arg.value);
diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index 4e0efff92dca..e759ce6ca700 100644
--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -51,8 +51,8 @@
  * for the cache windows.
  *
  * The call to dmub_srv_hw_init() programs the DMCUB registers to prepare
- * for command submission. Commands can be queued via dmub_srv_cmd_queue()
- * and executed via dmub_srv_cmd_execute().
+ * for command submission. Commands can be queued via dmub_srv_fb_cmd_queue()
+ * and executed via dmub_srv_fb_cmd_execute().
  *
  * If the queue is full the dmub_srv_wait_for_idle() call can be used to
  * wait until the queue has been cleared.
@@ -142,6 +142,7 @@ enum dmub_notification_type {
 	DMUB_NOTIFICATION_SET_CONFIG_REPLY,
 	DMUB_NOTIFICATION_DPIA_NOTIFICATION,
 	DMUB_NOTIFICATION_HPD_SENSE_NOTIFY,
+	DMUB_NOTIFICATION_FUSED_IO,
 	DMUB_NOTIFICATION_MAX
 };
 
@@ -170,6 +171,13 @@ enum dmub_srv_power_state_type {
 	DMUB_POWER_STATE_D3 = 8
 };
 
+/* enum dmub_inbox_cmd_interface type - defines default interface for host->dmub commands */
+enum dmub_inbox_cmd_interface_type {
+	DMUB_CMD_INTERFACE_DEFAULT = 0,
+	DMUB_CMD_INTERFACE_FB = 1,
+	DMUB_CMD_INTERFACE_REG = 2,
+};
+
 /**
  * struct dmub_region - dmub hw memory region
  * @base: base address for region, must be 256 byte aligned
@@ -349,6 +357,21 @@ struct dmub_diagnostic_data {
 	uint8_t is_cw6_enabled : 1;
 };
 
+struct dmub_srv_inbox {
+	/* generic status */
+	uint64_t num_submitted;
+	uint64_t num_reported;
+	union {
+		/* frame buffer mailbox status */
+		struct dmub_rb rb;
+		/* register mailbox status */
+		struct {
+			bool is_pending;
+			bool is_multi_pending;
+		};
+	};
+};
+
 /**
  * struct dmub_srv_base_funcs - Driver specific base callbacks
  */
@@ -462,18 +485,21 @@ struct dmub_srv_hw_funcs {
 	void (*init_reg_offsets)(struct dmub_srv *dmub, struct dc_context *ctx);
 
 	void (*subvp_save_surf_addr)(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index);
+
 	void (*send_reg_inbox0_cmd_msg)(struct dmub_srv *dmub,
 			union dmub_rb_cmd *cmd);
 	uint32_t (*read_reg_inbox0_rsp_int_status)(struct dmub_srv *dmub);
 	void (*read_reg_inbox0_cmd_rsp)(struct dmub_srv *dmub,
 			union dmub_rb_cmd *cmd);
 	void (*write_reg_inbox0_rsp_int_ack)(struct dmub_srv *dmub);
+	void (*clear_reg_inbox0_rsp_int_ack)(struct dmub_srv *dmub);
+	void (*enable_reg_inbox0_rsp_int)(struct dmub_srv *dmub, bool enable);
+
 	uint32_t (*read_reg_outbox0_rdy_int_status)(struct dmub_srv *dmub);
 	void (*write_reg_outbox0_rdy_int_ack)(struct dmub_srv *dmub);
 	void (*read_reg_outbox0_msg)(struct dmub_srv *dmub, uint32_t *msg);
 	void (*write_reg_outbox0_rsp)(struct dmub_srv *dmub, uint32_t *rsp);
 	uint32_t (*read_reg_outbox0_rsp_int_status)(struct dmub_srv *dmub);
-	void (*enable_reg_inbox0_rsp_int)(struct dmub_srv *dmub, bool enable);
 	void (*enable_reg_outbox0_rdy_int)(struct dmub_srv *dmub, bool enable);
 };
 
@@ -493,6 +519,7 @@ struct dmub_srv_create_params {
 	enum dmub_asic asic;
 	uint32_t fw_version;
 	bool is_virtual;
+	enum dmub_inbox_cmd_interface_type inbox_type;
 };
 
 /**
@@ -521,8 +548,9 @@ struct dmub_srv {
 	const struct dmub_srv_dcn401_regs *regs_dcn401;
 	struct dmub_srv_base_funcs funcs;
 	struct dmub_srv_hw_funcs hw_funcs;
-	struct dmub_rb inbox1_rb;
+	struct dmub_srv_inbox inbox1;
 	uint32_t inbox1_last_wptr;
+	struct dmub_srv_inbox reg_inbox0;
 	/**
 	 * outbox1_rb is accessed without locks (dal & dc)
 	 * and to be used only in dmub_srv_stat_get_notification()
@@ -542,6 +570,7 @@ struct dmub_srv {
 	struct dmub_fw_meta_info meta_info;
 	struct dmub_feature_caps feature_caps;
 	struct dmub_visual_confirm_color visual_confirm_color;
+	enum dmub_inbox_cmd_interface_type inbox_type;
 
 	enum dmub_srv_power_state_type power_state;
 	struct dmub_diagnostic_data debug;
@@ -566,11 +595,8 @@ struct dmub_notification {
 		struct aux_reply_data aux_reply;
 		enum dp_hpd_status hpd_status;
 		enum set_config_status sc_status;
-		/**
-		 * DPIA notification command.
-		 */
-		struct dmub_rb_cmd_dpia_notification dpia_notification;
 		struct dmub_rb_cmd_hpd_sense_notify_data hpd_sense_notify;
+		struct dmub_cmd_fused_request fused_request;
 	};
 };
 
@@ -699,19 +725,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
 enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub);
 
 /**
- * dmub_srv_sync_inbox1() - sync sw state with hw state
- * @dmub: the dmub service
- *
- * Sync sw state with hw state when resume from S0i3
- *
- * Return:
- *   DMUB_STATUS_OK - success
- *   DMUB_STATUS_INVALID - unspecified error
- */
-enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub);
-
-/**
- * dmub_srv_cmd_queue() - queues a command to the DMUB
+ * dmub_srv_fb_cmd_queue() - queues a command to the DMUB
  * @dmub: the dmub service
  * @cmd: the command to queue
  *
@@ -723,11 +737,11 @@ enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub);
  *   DMUB_STATUS_QUEUE_FULL - no remaining room in queue
  *   DMUB_STATUS_INVALID - unspecified error
  */
-enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
+enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub,
 				    const union dmub_rb_cmd *cmd);
 
 /**
- * dmub_srv_cmd_execute() - Executes a queued sequence to the dmub
+ * dmub_srv_fb_cmd_execute() - Executes a queued sequence to the dmub
  * @dmub: the dmub service
  *
  * Begins execution of queued commands on the dmub.
@@ -736,7 +750,7 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
  *   DMUB_STATUS_OK - success
  *   DMUB_STATUS_INVALID - unspecified error
  */
-enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub);
+enum dmub_status dmub_srv_fb_cmd_execute(struct dmub_srv *dmub);
 
 /**
  * dmub_srv_wait_for_hw_pwr_up() - Waits for firmware hardware power up is completed
@@ -795,6 +809,23 @@ enum dmub_status dmub_srv_wait_for_phy_init(struct dmub_srv *dmub,
 					    uint32_t timeout_us);
 
 /**
+ * dmub_srv_wait_for_pending() - Re-entrant wait for messages currently pending
+ * @dmub: the dmub service
+ * @timeout_us: the maximum number of microseconds to wait
+ *
+ * Waits until the commands queued prior to this call are complete.
+ * If interfaces remain busy due to additional work being submitted
+ * concurrently, this function will not continue to wait.
+ *
+ * Return:
+ *   DMUB_STATUS_OK - success
+ *   DMUB_STATUS_TIMEOUT - wait for buffer to flush timed out
+ *   DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_wait_for_pending(struct dmub_srv *dmub,
+					uint32_t timeout_us);
+
+/**
  * dmub_srv_wait_for_idle() - Waits for the DMUB to be idle
  * @dmub: the dmub service
  * @timeout_us: the maximum number of microseconds to wait
@@ -892,9 +923,6 @@ enum dmub_status dmub_srv_get_fw_boot_status(struct dmub_srv *dmub,
 enum dmub_status dmub_srv_get_fw_boot_option(struct dmub_srv *dmub,
 					     union dmub_fw_boot_options *option);
 
-enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub,
-					      union dmub_rb_cmd *cmd);
-
 enum dmub_status dmub_srv_set_skip_panel_power_sequence(struct dmub_srv *dmub,
 					     bool skip);
 
@@ -959,26 +987,6 @@ enum dmub_status dmub_srv_clear_inbox0_ack(struct dmub_srv *dmub);
 void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index);
 
 /**
- * dmub_srv_send_reg_inbox0_cmd() - send a dmub command and wait for the command
- * being processed by DMUB.
- * @dmub: The dmub service
- * @cmd: The dmub command being sent. If with_replay is true, the function will
- * update cmd with replied data.
- * @with_reply: true if DMUB reply needs to be copied back to cmd. false if the
- * cmd doesn't need to be replied.
- * @timeout_us: timeout in microseconds.
- *
- * Return:
- * DMUB_STATUS_OK - success
- * DMUB_STATUS_TIMEOUT - DMUB fails to process the command within the timeout
- * interval.
- */
-enum dmub_status dmub_srv_send_reg_inbox0_cmd(
-		struct dmub_srv *dmub,
-		union dmub_rb_cmd *cmd,
-		bool with_reply, uint32_t timeout_us);
-
-/**
  * dmub_srv_set_power_state() - Track DC power state in dmub_srv
  * @dmub: The dmub service
  * @power_state: DC power state setting
@@ -990,4 +998,59 @@ enum dmub_status dmub_srv_send_reg_inbox0_cmd(
  */
 void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state);
 
+/**
+ * dmub_srv_reg_cmd_execute() - Executes provided command to the dmub
+ * @dmub: the dmub service
+ * @cmd: the command packet to be executed
+ *
+ * Executes a single command for the dmub.
+ *
+ * Return:
+ *   DMUB_STATUS_OK - success
+ *   DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_reg_cmd_execute(struct dmub_srv *dmub, union dmub_rb_cmd *cmd);
+
+
+/**
+ * dmub_srv_cmd_get_response() - Copies return data for command into buffer
+ * @dmub: the dmub service
+ * @cmd_rsp: response buffer
+ *
+ * Copies return data for command into buffer
+ */
+void dmub_srv_cmd_get_response(struct dmub_srv *dmub,
+		union dmub_rb_cmd *cmd_rsp);
+
+/**
+ * dmub_srv_sync_inboxes() - Sync inbox state
+ * @dmub: the dmub service
+ *
+ * Sync inbox state
+ *
+ * Return:
+ *   DMUB_STATUS_OK - success
+ *   DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_sync_inboxes(struct dmub_srv *dmub);
+
+/**
+ * dmub_srv_wait_for_inbox_free() - Waits for space in the DMUB inbox to free up
+ * @dmub: the dmub service
+ * @timeout_us: the maximum number of microseconds to wait
+ * @num_free_required: number of free entries required
+ *
+ * Waits until the DMUB buffer is freed to the specified number.
+ *  The maximum wait time is given in microseconds to prevent spinning
+ * forever.
+ *
+ * Return:
+ *   DMUB_STATUS_OK - success
+ *   DMUB_STATUS_TIMEOUT - wait for buffer to flush timed out
+ *   DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_wait_for_inbox_free(struct dmub_srv *dmub,
+		uint32_t timeout_us,
+		uint32_t num_free_required);
+
 #endif /* _DMUB_SRV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 1f5f4e3e49d4..76e9dcc15466 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -36,6 +36,9 @@
 //<DMUB_TYPES>==================================================================
 /* Basic type definitions. */
 
+#ifdef __forceinline
+#undef __forceinline
+#endif
 #define __forceinline inline
 
 /**
@@ -2636,7 +2639,11 @@ enum dp_hpd_type {
 	/**
 	 * DP HPD short pulse
 	 */
-	DP_IRQ
+	DP_IRQ = 1,
+	/**
+	 * Failure to acquire DP HPD state
+	 */
+	DP_NONE_HPD = 2
 };
 
 /**
@@ -4360,6 +4367,11 @@ enum dmub_cmd_abm_type {
 	 * Get the current ACE curve.
 	 */
 	DMUB_CMD__ABM_GET_ACE_CURVE = 10,
+
+	/**
+	 * Get current histogram data
+	 */
+	DMUB_CMD__ABM_GET_HISTOGRAM_DATA = 11,
 };
 
 struct abm_ace_curve {
@@ -4954,6 +4966,20 @@ enum dmub_abm_ace_curve_type {
 };
 
 /**
+ * enum dmub_abm_histogram_type - Histogram type.
+ */
+enum dmub_abm_histogram_type {
+	/**
+	 * ACE curve as defined by the SW layer.
+	 */
+	ABM_HISTOGRAM_TYPE__SW = 0,
+	/**
+	 * ACE curve as defined by the SW to HW translation interface layer.
+	 */
+	ABM_HISTOGRAM_TYPE__SW_IF = 1,
+};
+
+/**
  * Definition of a DMUB_CMD__ABM_GET_ACE_CURVE command.
  */
 struct dmub_rb_cmd_abm_get_ace_curve {
@@ -4989,6 +5015,41 @@ struct dmub_rb_cmd_abm_get_ace_curve {
 };
 
 /**
+ * Definition of a DMUB_CMD__ABM_GET_HISTOGRAM command.
+ */
+struct dmub_rb_cmd_abm_get_histogram {
+	/**
+	 * Command header.
+	 */
+	struct dmub_cmd_header header;
+
+	/**
+	 * Address where Histogram should be copied.
+	 */
+	union dmub_addr dest;
+
+	/**
+	 * Type of Histogram being queried.
+	 */
+	enum dmub_abm_histogram_type histogram_type;
+
+	/**
+	 * Indirect buffer length.
+	 */
+	uint16_t bytes;
+
+	/**
+	 * eDP panel instance.
+	 */
+	uint8_t panel_inst;
+
+	/**
+	 * Explicit padding to 4 byte boundary.
+	 */
+	uint8_t pad;
+};
+
+/**
  * Definition of a DMUB_CMD__ABM_SAVE_RESTORE command.
  */
 struct dmub_rb_cmd_abm_save_restore {
@@ -5389,7 +5450,8 @@ struct dmub_cmd_fused_request {
 		struct dmub_cmd_fused_request_location_i2c {
 			uint8_t is_aux : 1;  // False
 			uint8_t ddc_line : 3;
-			uint8_t _reserved0 : 4;
+			uint8_t over_aux : 1;
+			uint8_t _reserved0 : 3;
 			uint8_t address;
 			uint8_t offset;
 			uint8_t length;
@@ -5687,6 +5749,11 @@ union dmub_rb_cmd {
 	struct dmub_rb_cmd_abm_get_ace_curve abm_get_ace_curve;
 
 	/**
+	 * Definition of a DMUB_CMD__ABM_GET_HISTOGRAM command.
+	 */
+	struct dmub_rb_cmd_abm_get_histogram abm_get_histogram;
+
+	/**
 	 * Definition of a DMUB_CMD__ABM_SET_EVENT command.
 	 */
 	struct dmub_rb_cmd_abm_set_event abm_set_event;
@@ -5934,6 +6001,9 @@ static inline uint32_t dmub_rb_num_free(struct dmub_rb *rb)
 	else
 		data_count = rb->capacity - (rb->rptr - rb->wrpt);
 
+	/* +1 because 1 entry is always unusable */
+	data_count += DMUB_RB_CMD_SIZE;
+
 	return (rb->capacity - data_count) / DMUB_RB_CMD_SIZE;
 }
 
@@ -5953,6 +6023,7 @@ static inline bool dmub_rb_full(struct dmub_rb *rb)
 	else
 		data_count = rb->capacity - (rb->rptr - rb->wrpt);
 
+	/* -1 because 1 entry is always unusable */
 	return (data_count == (rb->capacity - DMUB_RB_CMD_SIZE));
 }
 
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c
index e67f7c4784eb..731ca9b6a6cf 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c
@@ -517,28 +517,69 @@ void dmub_dcn401_send_reg_inbox0_cmd_msg(struct dmub_srv *dmub,
 		union dmub_rb_cmd *cmd)
 {
 	uint32_t *dwords = (uint32_t *)cmd;
-
+	int32_t payload_size_bytes = cmd->cmd_common.header.payload_bytes;
+	uint32_t msg_index;
 	static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch");
 
-	REG_WRITE(DMCUB_REG_INBOX0_MSG0, dwords[0]);
-	REG_WRITE(DMCUB_REG_INBOX0_MSG1, dwords[1]);
-	REG_WRITE(DMCUB_REG_INBOX0_MSG2, dwords[2]);
-	REG_WRITE(DMCUB_REG_INBOX0_MSG3, dwords[3]);
-	REG_WRITE(DMCUB_REG_INBOX0_MSG4, dwords[4]);
-	REG_WRITE(DMCUB_REG_INBOX0_MSG5, dwords[5]);
-	REG_WRITE(DMCUB_REG_INBOX0_MSG6, dwords[6]);
-	REG_WRITE(DMCUB_REG_INBOX0_MSG7, dwords[7]);
-	REG_WRITE(DMCUB_REG_INBOX0_MSG8, dwords[8]);
-	REG_WRITE(DMCUB_REG_INBOX0_MSG9, dwords[9]);
-	REG_WRITE(DMCUB_REG_INBOX0_MSG10, dwords[10]);
-	REG_WRITE(DMCUB_REG_INBOX0_MSG11, dwords[11]);
-	REG_WRITE(DMCUB_REG_INBOX0_MSG12, dwords[12]);
-	REG_WRITE(DMCUB_REG_INBOX0_MSG13, dwords[13]);
-	REG_WRITE(DMCUB_REG_INBOX0_MSG14, dwords[14]);
+	/* read remaining data based on payload size */
+	for (msg_index = 0; msg_index < 15; msg_index++) {
+		if (payload_size_bytes <= msg_index * 4) {
+			break;
+		}
+
+		switch (msg_index) {
+		case 0:
+			REG_WRITE(DMCUB_REG_INBOX0_MSG0, dwords[msg_index + 1]);
+			break;
+		case 1:
+			REG_WRITE(DMCUB_REG_INBOX0_MSG1, dwords[msg_index + 1]);
+			break;
+		case 2:
+			REG_WRITE(DMCUB_REG_INBOX0_MSG2, dwords[msg_index + 1]);
+			break;
+		case 3:
+			REG_WRITE(DMCUB_REG_INBOX0_MSG3, dwords[msg_index + 1]);
+			break;
+		case 4:
+			REG_WRITE(DMCUB_REG_INBOX0_MSG4, dwords[msg_index + 1]);
+			break;
+		case 5:
+			REG_WRITE(DMCUB_REG_INBOX0_MSG5, dwords[msg_index + 1]);
+			break;
+		case 6:
+			REG_WRITE(DMCUB_REG_INBOX0_MSG6, dwords[msg_index + 1]);
+			break;
+		case 7:
+			REG_WRITE(DMCUB_REG_INBOX0_MSG7, dwords[msg_index + 1]);
+			break;
+		case 8:
+			REG_WRITE(DMCUB_REG_INBOX0_MSG8, dwords[msg_index + 1]);
+			break;
+		case 9:
+			REG_WRITE(DMCUB_REG_INBOX0_MSG9, dwords[msg_index + 1]);
+			break;
+		case 10:
+			REG_WRITE(DMCUB_REG_INBOX0_MSG10, dwords[msg_index + 1]);
+			break;
+		case 11:
+			REG_WRITE(DMCUB_REG_INBOX0_MSG11, dwords[msg_index + 1]);
+			break;
+		case 12:
+			REG_WRITE(DMCUB_REG_INBOX0_MSG12, dwords[msg_index + 1]);
+			break;
+		case 13:
+			REG_WRITE(DMCUB_REG_INBOX0_MSG13, dwords[msg_index + 1]);
+			break;
+		case 14:
+			REG_WRITE(DMCUB_REG_INBOX0_MSG14, dwords[msg_index + 1]);
+			break;
+		}
+	}
+
 	/* writing to INBOX RDY register will trigger DMUB REG INBOX0 RDY
 	 * interrupt.
 	 */
-	REG_WRITE(DMCUB_REG_INBOX0_RDY, dwords[15]);
+	REG_WRITE(DMCUB_REG_INBOX0_RDY, dwords[0]);
 }
 
 uint32_t dmub_dcn401_read_reg_inbox0_rsp_int_status(struct dmub_srv *dmub)
@@ -556,30 +597,39 @@ void dmub_dcn401_read_reg_inbox0_cmd_rsp(struct dmub_srv *dmub,
 
 	static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch");
 
-	dwords[0] = REG_READ(DMCUB_REG_INBOX0_MSG0);
-	dwords[1] = REG_READ(DMCUB_REG_INBOX0_MSG1);
-	dwords[2] = REG_READ(DMCUB_REG_INBOX0_MSG2);
-	dwords[3] = REG_READ(DMCUB_REG_INBOX0_MSG3);
-	dwords[4] = REG_READ(DMCUB_REG_INBOX0_MSG4);
-	dwords[5] = REG_READ(DMCUB_REG_INBOX0_MSG5);
-	dwords[6] = REG_READ(DMCUB_REG_INBOX0_MSG6);
-	dwords[7] = REG_READ(DMCUB_REG_INBOX0_MSG7);
-	dwords[8] = REG_READ(DMCUB_REG_INBOX0_MSG8);
-	dwords[9] = REG_READ(DMCUB_REG_INBOX0_MSG9);
-	dwords[10] = REG_READ(DMCUB_REG_INBOX0_MSG10);
-	dwords[11] = REG_READ(DMCUB_REG_INBOX0_MSG11);
-	dwords[12] = REG_READ(DMCUB_REG_INBOX0_MSG12);
-	dwords[13] = REG_READ(DMCUB_REG_INBOX0_MSG13);
-	dwords[14] = REG_READ(DMCUB_REG_INBOX0_MSG14);
-	dwords[15] = REG_READ(DMCUB_REG_INBOX0_RSP);
+	dwords[0] = REG_READ(DMCUB_REG_INBOX0_RSP);
+	dwords[1] = REG_READ(DMCUB_REG_INBOX0_MSG0);
+	dwords[2] = REG_READ(DMCUB_REG_INBOX0_MSG1);
+	dwords[3] = REG_READ(DMCUB_REG_INBOX0_MSG2);
+	dwords[4] = REG_READ(DMCUB_REG_INBOX0_MSG3);
+	dwords[5] = REG_READ(DMCUB_REG_INBOX0_MSG4);
+	dwords[6] = REG_READ(DMCUB_REG_INBOX0_MSG5);
+	dwords[7] = REG_READ(DMCUB_REG_INBOX0_MSG6);
+	dwords[8] = REG_READ(DMCUB_REG_INBOX0_MSG7);
+	dwords[9] = REG_READ(DMCUB_REG_INBOX0_MSG8);
+	dwords[10] = REG_READ(DMCUB_REG_INBOX0_MSG9);
+	dwords[11] = REG_READ(DMCUB_REG_INBOX0_MSG10);
+	dwords[12] = REG_READ(DMCUB_REG_INBOX0_MSG11);
+	dwords[13] = REG_READ(DMCUB_REG_INBOX0_MSG12);
+	dwords[14] = REG_READ(DMCUB_REG_INBOX0_MSG13);
+	dwords[15] = REG_READ(DMCUB_REG_INBOX0_MSG14);
 }
 
 void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub)
 {
 	REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 1);
+}
+
+void dmub_dcn401_clear_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub)
+{
 	REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 0);
 }
 
+void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable)
+{
+	REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN, enable ? 1:0);
+}
+
 void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub)
 {
 	REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_ACK, 1);
@@ -604,11 +654,6 @@ uint32_t dmub_dcn401_read_reg_outbox0_rsp_int_status(struct dmub_srv *dmub)
 	return status;
 }
 
-void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable)
-{
-	REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN, enable ? 1:0);
-}
-
 void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable)
 {
 	REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_EN, enable ? 1:0);
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h
index c35be52676f6..88c3a44d67d9 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h
@@ -277,11 +277,13 @@ uint32_t dmub_dcn401_read_reg_inbox0_rsp_int_status(struct dmub_srv *dmub);
 void dmub_dcn401_read_reg_inbox0_cmd_rsp(struct dmub_srv *dmub,
 		union dmub_rb_cmd *cmd);
 void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub);
+void dmub_dcn401_clear_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub);
+void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable);
+
 void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub);
 void dmub_dcn401_read_reg_outbox0_msg(struct dmub_srv *dmub, uint32_t *msg);
 void dmub_dcn401_write_reg_outbox0_rsp(struct dmub_srv *dmub, uint32_t *msg);
 uint32_t dmub_dcn401_read_reg_outbox0_rsp_int_status(struct dmub_srv *dmub);
-void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable);
 void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable);
 uint32_t dmub_dcn401_read_reg_outbox0_rdy_int_status(struct dmub_srv *dmub);
 
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index ae8133816b43..c917a70b3c19 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -157,6 +157,9 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic)
 {
 	struct dmub_srv_hw_funcs *funcs = &dmub->hw_funcs;
 
+	/* default to specifying now inbox type */
+	enum dmub_inbox_cmd_interface_type default_inbox_type = DMUB_CMD_INTERFACE_DEFAULT;
+
 	switch (asic) {
 	case DMUB_ASIC_DCN20:
 	case DMUB_ASIC_DCN21:
@@ -395,10 +398,15 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic)
 
 		funcs->get_current_time = dmub_dcn401_get_current_time;
 		funcs->get_diagnostic_data = dmub_dcn401_get_diagnostic_data;
+
 		funcs->send_reg_inbox0_cmd_msg = dmub_dcn401_send_reg_inbox0_cmd_msg;
 		funcs->read_reg_inbox0_rsp_int_status = dmub_dcn401_read_reg_inbox0_rsp_int_status;
 		funcs->read_reg_inbox0_cmd_rsp = dmub_dcn401_read_reg_inbox0_cmd_rsp;
 		funcs->write_reg_inbox0_rsp_int_ack = dmub_dcn401_write_reg_inbox0_rsp_int_ack;
+		funcs->clear_reg_inbox0_rsp_int_ack = dmub_dcn401_clear_reg_inbox0_rsp_int_ack;
+		funcs->enable_reg_inbox0_rsp_int = dmub_dcn401_enable_reg_inbox0_rsp_int;
+		default_inbox_type = DMUB_CMD_INTERFACE_FB; // still default to FB for now
+
 		funcs->write_reg_outbox0_rdy_int_ack = dmub_dcn401_write_reg_outbox0_rdy_int_ack;
 		funcs->read_reg_outbox0_msg = dmub_dcn401_read_reg_outbox0_msg;
 		funcs->write_reg_outbox0_rsp = dmub_dcn401_write_reg_outbox0_rsp;
@@ -411,6 +419,20 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic)
 		return false;
 	}
 
+	/* set default inbox type if not overriden */
+	if (dmub->inbox_type == DMUB_CMD_INTERFACE_DEFAULT) {
+		if (default_inbox_type != DMUB_CMD_INTERFACE_DEFAULT) {
+			/* use default inbox type as specified by DCN rev */
+			dmub->inbox_type = default_inbox_type;
+		} else if (funcs->send_reg_inbox0_cmd_msg) {
+			/* prefer reg as default inbox type if present */
+			dmub->inbox_type = DMUB_CMD_INTERFACE_REG;
+		} else {
+			/* use fb as fallback */
+			dmub->inbox_type = DMUB_CMD_INTERFACE_FB;
+		}
+	}
+
 	return true;
 }
 
@@ -426,6 +448,7 @@ enum dmub_status dmub_srv_create(struct dmub_srv *dmub,
 	dmub->asic = params->asic;
 	dmub->fw_version = params->fw_version;
 	dmub->is_virtual = params->is_virtual;
+	dmub->inbox_type = params->inbox_type;
 
 	/* Setup asic dependent hardware funcs. */
 	if (!dmub_srv_hw_setup(dmub, params->asic)) {
@@ -695,7 +718,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
 	inbox1.base = cw4.region.base;
 	inbox1.top = cw4.region.base + DMUB_RB_SIZE;
 	outbox1.base = inbox1.top;
-	outbox1.top = cw4.region.top;
+	outbox1.top = inbox1.top + DMUB_RB_SIZE;
 
 	cw5.offset.quad_part = tracebuff_fb->gpu_addr;
 	cw5.region.base = DMUB_CW5_BASE;
@@ -737,7 +760,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
 	rb_params.ctx = dmub;
 	rb_params.base_address = mail_fb->cpu_addr;
 	rb_params.capacity = DMUB_RB_SIZE;
-	dmub_rb_init(&dmub->inbox1_rb, &rb_params);
+	dmub_rb_init(&dmub->inbox1.rb, &rb_params);
 
 	// Initialize outbox1 ring buffer
 	rb_params.ctx = dmub;
@@ -768,27 +791,6 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
 	return DMUB_STATUS_OK;
 }
 
-enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub)
-{
-	if (!dmub->sw_init)
-		return DMUB_STATUS_INVALID;
-
-	if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) {
-		uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
-		uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub);
-
-		if (rptr > dmub->inbox1_rb.capacity || wptr > dmub->inbox1_rb.capacity) {
-			return DMUB_STATUS_HW_FAILURE;
-		} else {
-			dmub->inbox1_rb.rptr = rptr;
-			dmub->inbox1_rb.wrpt = wptr;
-			dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt;
-		}
-	}
-
-	return DMUB_STATUS_OK;
-}
-
 enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub)
 {
 	if (!dmub->sw_init)
@@ -799,8 +801,13 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub)
 
 	/* mailboxes have been reset in hw, so reset the sw state as well */
 	dmub->inbox1_last_wptr = 0;
-	dmub->inbox1_rb.wrpt = 0;
-	dmub->inbox1_rb.rptr = 0;
+	dmub->inbox1.rb.wrpt = 0;
+	dmub->inbox1.rb.rptr = 0;
+	dmub->inbox1.num_reported = 0;
+	dmub->inbox1.num_submitted = 0;
+	dmub->reg_inbox0.num_reported = 0;
+	dmub->reg_inbox0.num_submitted = 0;
+	dmub->reg_inbox0.is_pending = 0;
 	dmub->outbox0_rb.wrpt = 0;
 	dmub->outbox0_rb.rptr = 0;
 	dmub->outbox1_rb.wrpt = 0;
@@ -811,7 +818,7 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub)
 	return DMUB_STATUS_OK;
 }
 
-enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
+enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub,
 				    const union dmub_rb_cmd *cmd)
 {
 	if (!dmub->hw_init)
@@ -820,18 +827,20 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
 	if (dmub->power_state != DMUB_POWER_STATE_D0)
 		return DMUB_STATUS_POWER_STATE_D3;
 
-	if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity ||
-	    dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) {
+	if (dmub->inbox1.rb.rptr > dmub->inbox1.rb.capacity ||
+	    dmub->inbox1.rb.wrpt > dmub->inbox1.rb.capacity) {
 		return DMUB_STATUS_HW_FAILURE;
 	}
 
-	if (dmub_rb_push_front(&dmub->inbox1_rb, cmd))
+	if (dmub_rb_push_front(&dmub->inbox1.rb, cmd)) {
+		dmub->inbox1.num_submitted++;
 		return DMUB_STATUS_OK;
+	}
 
 	return DMUB_STATUS_QUEUE_FULL;
 }
 
-enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub)
+enum dmub_status dmub_srv_fb_cmd_execute(struct dmub_srv *dmub)
 {
 	struct dmub_rb flush_rb;
 
@@ -846,13 +855,13 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub)
 	 * been flushed to framebuffer memory. Otherwise DMCUB might
 	 * read back stale, fully invalid or partially invalid data.
 	 */
-	flush_rb = dmub->inbox1_rb;
+	flush_rb = dmub->inbox1.rb;
 	flush_rb.rptr = dmub->inbox1_last_wptr;
 	dmub_rb_flush_pending(&flush_rb);
 
-		dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt);
+		dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1.rb.wrpt);
 
-	dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt;
+	dmub->inbox1_last_wptr = dmub->inbox1.rb.wrpt;
 
 	return DMUB_STATUS_OK;
 }
@@ -910,26 +919,110 @@ enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub,
 	return DMUB_STATUS_TIMEOUT;
 }
 
+static void dmub_srv_update_reg_inbox0_status(struct dmub_srv *dmub)
+{
+	if (dmub->reg_inbox0.is_pending) {
+		dmub->reg_inbox0.is_pending = dmub->hw_funcs.read_reg_inbox0_rsp_int_status &&
+				!dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub);
+
+		if (!dmub->reg_inbox0.is_pending) {
+			/* ack the rsp interrupt */
+			if (dmub->hw_funcs.write_reg_inbox0_rsp_int_ack)
+				dmub->hw_funcs.write_reg_inbox0_rsp_int_ack(dmub);
+
+			/* only update the reported count if commands aren't being batched */
+			if (!dmub->reg_inbox0.is_pending && !dmub->reg_inbox0.is_multi_pending) {
+				dmub->reg_inbox0.num_reported = dmub->reg_inbox0.num_submitted;
+			}
+		}
+	}
+}
+
+enum dmub_status dmub_srv_wait_for_pending(struct dmub_srv *dmub,
+					uint32_t timeout_us)
+{
+	uint32_t i;
+	const uint32_t polling_interval_us = 1;
+	struct dmub_srv_inbox scratch_reg_inbox0 = dmub->reg_inbox0;
+	struct dmub_srv_inbox scratch_inbox1 = dmub->inbox1;
+	const volatile struct dmub_srv_inbox *reg_inbox0 = &dmub->reg_inbox0;
+	const volatile struct dmub_srv_inbox *inbox1 = &dmub->inbox1;
+
+	if (!dmub->hw_init ||
+			!dmub->hw_funcs.get_inbox1_wptr)
+		return DMUB_STATUS_INVALID;
+
+	/* take a snapshot of the required mailbox state */
+	scratch_inbox1.rb.wrpt = dmub->hw_funcs.get_inbox1_wptr(dmub);
+
+	for (i = 0; i <= timeout_us; i += polling_interval_us) {
+			scratch_inbox1.rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
+
+		scratch_reg_inbox0.is_pending = scratch_reg_inbox0.is_pending &&
+				dmub->hw_funcs.read_reg_inbox0_rsp_int_status &&
+				!dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub);
+
+		if (scratch_inbox1.rb.rptr > dmub->inbox1.rb.capacity)
+			return DMUB_STATUS_HW_FAILURE;
+
+		/* check current HW state first, but use command submission vs reported as a fallback */
+		if ((dmub_rb_empty(&scratch_inbox1.rb) ||
+				inbox1->num_reported >= scratch_inbox1.num_submitted) &&
+				(!scratch_reg_inbox0.is_pending ||
+				reg_inbox0->num_reported >= scratch_reg_inbox0.num_submitted))
+			return DMUB_STATUS_OK;
+
+		udelay(polling_interval_us);
+	}
+
+	return DMUB_STATUS_TIMEOUT;
+}
+
+static enum dmub_status dmub_srv_update_inbox_status(struct dmub_srv *dmub)
+{
+	uint32_t rptr;
+
+	/* update inbox1 state */
+		rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
+
+	if (rptr > dmub->inbox1.rb.capacity)
+		return DMUB_STATUS_HW_FAILURE;
+
+	if (dmub->inbox1.rb.rptr > rptr) {
+		/* rb wrapped */
+		dmub->inbox1.num_reported += (rptr + dmub->inbox1.rb.capacity - dmub->inbox1.rb.rptr) / DMUB_RB_CMD_SIZE;
+	} else {
+		dmub->inbox1.num_reported += (rptr - dmub->inbox1.rb.rptr) / DMUB_RB_CMD_SIZE;
+	}
+	dmub->inbox1.rb.rptr = rptr;
+
+	/* update reg_inbox0 */
+	dmub_srv_update_reg_inbox0_status(dmub);
+
+	return DMUB_STATUS_OK;
+}
+
 enum dmub_status dmub_srv_wait_for_idle(struct dmub_srv *dmub,
 					uint32_t timeout_us)
 {
-	uint32_t i, rptr;
+	enum dmub_status status;
+	uint32_t i;
+	const uint32_t polling_interval_us = 1;
 
 	if (!dmub->hw_init)
 		return DMUB_STATUS_INVALID;
 
-	for (i = 0; i <= timeout_us; ++i) {
-			rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
-
-		if (rptr > dmub->inbox1_rb.capacity)
-			return DMUB_STATUS_HW_FAILURE;
+	for (i = 0; i < timeout_us; i += polling_interval_us) {
+		status = dmub_srv_update_inbox_status(dmub);
 
-		dmub->inbox1_rb.rptr = rptr;
+		if (status != DMUB_STATUS_OK)
+			return status;
 
-		if (dmub_rb_empty(&dmub->inbox1_rb))
+		/* check for idle */
+		if (dmub_rb_empty(&dmub->inbox1.rb) && !dmub->reg_inbox0.is_pending)
 			return DMUB_STATUS_OK;
 
-		udelay(1);
+		udelay(polling_interval_us);
 	}
 
 	return DMUB_STATUS_TIMEOUT;
@@ -1040,35 +1133,6 @@ enum dmub_status dmub_srv_set_skip_panel_power_sequence(struct dmub_srv *dmub,
 	return DMUB_STATUS_OK;
 }
 
-enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub,
-					      union dmub_rb_cmd *cmd)
-{
-	enum dmub_status status = DMUB_STATUS_OK;
-
-	// Queue command
-	status = dmub_srv_cmd_queue(dmub, cmd);
-
-	if (status != DMUB_STATUS_OK)
-		return status;
-
-	// Execute command
-	status = dmub_srv_cmd_execute(dmub);
-
-	if (status != DMUB_STATUS_OK)
-		return status;
-
-	// Wait for DMUB to process command
-	status = dmub_srv_wait_for_idle(dmub, 100000);
-
-	if (status != DMUB_STATUS_OK)
-		return status;
-
-	// Copy data back from ring buffer into command
-	dmub_rb_get_return_data(&dmub->inbox1_rb, cmd);
-
-	return status;
-}
-
 static inline bool dmub_rb_out_trace_buffer_front(struct dmub_rb *rb,
 				 void *entry)
 {
@@ -1160,47 +1224,132 @@ void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_
 	}
 }
 
+void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state)
+{
+	if (!dmub || !dmub->hw_init)
+		return;
+
+	dmub->power_state = dmub_srv_power_state;
+}
 
-enum dmub_status dmub_srv_send_reg_inbox0_cmd(
-		struct dmub_srv *dmub,
-		union dmub_rb_cmd *cmd,
-		bool with_reply, uint32_t timeout_us)
+enum dmub_status dmub_srv_reg_cmd_execute(struct dmub_srv *dmub, union dmub_rb_cmd *cmd)
 {
-	uint32_t rsp_ready = 0;
-	uint32_t i;
+	uint32_t num_pending = 0;
+
+	if (!dmub->hw_init)
+		return DMUB_STATUS_INVALID;
+
+	if (dmub->power_state != DMUB_POWER_STATE_D0)
+		return DMUB_STATUS_POWER_STATE_D3;
+
+	if (!dmub->hw_funcs.send_reg_inbox0_cmd_msg ||
+			!dmub->hw_funcs.clear_reg_inbox0_rsp_int_ack)
+		return DMUB_STATUS_INVALID;
 
+	if (dmub->reg_inbox0.num_submitted >= dmub->reg_inbox0.num_reported)
+		num_pending = dmub->reg_inbox0.num_submitted - dmub->reg_inbox0.num_reported;
+	else
+		/* num_submitted wrapped */
+		num_pending = DMUB_REG_INBOX0_RB_MAX_ENTRY -
+				(dmub->reg_inbox0.num_reported - dmub->reg_inbox0.num_submitted);
+
+	if (num_pending >= DMUB_REG_INBOX0_RB_MAX_ENTRY)
+		return DMUB_STATUS_QUEUE_FULL;
+
+	/* clear last rsp ack and send message */
+	dmub->hw_funcs.clear_reg_inbox0_rsp_int_ack(dmub);
 	dmub->hw_funcs.send_reg_inbox0_cmd_msg(dmub, cmd);
 
-	for (i = 0; i < timeout_us; i++) {
-		rsp_ready = dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub);
-		if (rsp_ready)
-			break;
-		udelay(1);
+	dmub->reg_inbox0.num_submitted++;
+	dmub->reg_inbox0.is_pending = true;
+	dmub->reg_inbox0.is_multi_pending = cmd->cmd_common.header.multi_cmd_pending;
+
+	return DMUB_STATUS_OK;
+}
+
+void dmub_srv_cmd_get_response(struct dmub_srv *dmub,
+		union dmub_rb_cmd *cmd_rsp)
+{
+	if (dmub) {
+		if (dmub->inbox_type == DMUB_CMD_INTERFACE_REG &&
+				dmub->hw_funcs.read_reg_inbox0_cmd_rsp) {
+			dmub->hw_funcs.read_reg_inbox0_cmd_rsp(dmub, cmd_rsp);
+		} else {
+			dmub_rb_get_return_data(&dmub->inbox1.rb, cmd_rsp);
+		}
 	}
-	if (rsp_ready == 0)
-		return DMUB_STATUS_TIMEOUT;
+}
 
-	if (with_reply)
-		dmub->hw_funcs.read_reg_inbox0_cmd_rsp(dmub, cmd);
+static enum dmub_status dmub_srv_sync_reg_inbox0(struct dmub_srv *dmub)
+{
+	if (!dmub || !dmub->sw_init)
+		return DMUB_STATUS_INVALID;
 
-	dmub->hw_funcs.write_reg_inbox0_rsp_int_ack(dmub);
+	dmub->reg_inbox0.is_pending = 0;
+	dmub->reg_inbox0.is_multi_pending = 0;
 
-	/* wait for rsp int status is cleared to initial state before exit */
-	for (; i <= timeout_us; i++) {
-		rsp_ready = dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub);
-		if (rsp_ready == 0)
-			break;
-		udelay(1);
+	return DMUB_STATUS_OK;
+}
+
+static enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub)
+{
+	if (!dmub->sw_init)
+		return DMUB_STATUS_INVALID;
+
+	if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) {
+		uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
+		uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub);
+
+		if (rptr > dmub->inbox1.rb.capacity || wptr > dmub->inbox1.rb.capacity) {
+			return DMUB_STATUS_HW_FAILURE;
+		} else {
+			dmub->inbox1.rb.rptr = rptr;
+			dmub->inbox1.rb.wrpt = wptr;
+			dmub->inbox1_last_wptr = dmub->inbox1.rb.wrpt;
+		}
 	}
-	ASSERT(rsp_ready == 0);
 
 	return DMUB_STATUS_OK;
 }
 
-void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state)
+enum dmub_status dmub_srv_sync_inboxes(struct dmub_srv *dmub)
 {
-	if (!dmub || !dmub->hw_init)
-		return;
+	enum dmub_status status;
 
-	dmub->power_state = dmub_srv_power_state;
+	status = dmub_srv_sync_reg_inbox0(dmub);
+	if (status != DMUB_STATUS_OK)
+		return status;
+
+	status = dmub_srv_sync_inbox1(dmub);
+	if (status != DMUB_STATUS_OK)
+		return status;
+
+	return DMUB_STATUS_OK;
+}
+
+enum dmub_status dmub_srv_wait_for_inbox_free(struct dmub_srv *dmub,
+		uint32_t timeout_us,
+		uint32_t num_free_required)
+{
+	enum dmub_status status;
+	uint32_t i;
+	const uint32_t polling_interval_us = 1;
+
+	if (!dmub->hw_init)
+		return DMUB_STATUS_INVALID;
+
+	for (i = 0; i < timeout_us; i += polling_interval_us) {
+		status = dmub_srv_update_inbox_status(dmub);
+
+		if (status != DMUB_STATUS_OK)
+			return status;
+
+		/* check for space in inbox1 */
+		if (dmub_rb_num_free(&dmub->inbox1.rb) >= num_free_required)
+			return DMUB_STATUS_OK;
+
+		udelay(polling_interval_us);
+	}
+
+	return DMUB_STATUS_TIMEOUT;
 }
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c
index cce887cefc01..567c5b1aeb7a 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c
@@ -95,23 +95,6 @@ enum dmub_status dmub_srv_stat_get_notification(struct dmub_srv *dmub,
 	case DMUB_OUT_CMD__DPIA_NOTIFICATION:
 		notify->type = DMUB_NOTIFICATION_DPIA_NOTIFICATION;
 		notify->link_index = cmd.dpia_notification.payload.header.instance;
-
-		if (cmd.dpia_notification.payload.header.type == DPIA_NOTIFY__BW_ALLOCATION) {
-
-			notify->dpia_notification.payload.data.dpia_bw_alloc.estimated_bw =
-					cmd.dpia_notification.payload.data.dpia_bw_alloc.estimated_bw;
-			notify->dpia_notification.payload.data.dpia_bw_alloc.allocated_bw =
-					cmd.dpia_notification.payload.data.dpia_bw_alloc.allocated_bw;
-
-			if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.bw_request_failed)
-				notify->result = DPIA_BW_REQ_FAILED;
-			else if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.bw_request_succeeded)
-				notify->result = DPIA_BW_REQ_SUCCESS;
-			else if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.est_bw_changed)
-				notify->result = DPIA_EST_BW_CHANGED;
-			else if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.bw_alloc_cap_changed)
-				notify->result = DPIA_BW_ALLOC_CAPS_CHANGED;
-		}
 		break;
 	case DMUB_OUT_CMD__HPD_SENSE_NOTIFY:
 		notify->type = DMUB_NOTIFICATION_HPD_SENSE_NOTIFY;
@@ -119,6 +102,10 @@ enum dmub_status dmub_srv_stat_get_notification(struct dmub_srv *dmub,
 			    &cmd.hpd_sense_notify.data,
 			    sizeof(cmd.hpd_sense_notify.data));
 		break;
+	case DMUB_OUT_CMD__FUSED_IO:
+		notify->type = DMUB_NOTIFICATION_FUSED_IO;
+		dmub_memcpy(&notify->fused_request, &cmd.fused_io.request, sizeof(cmd.fused_io.request));
+		break;
 	default:
 		notify->type = DMUB_NOTIFICATION_NO_DATA;
 		break;
diff --git a/drivers/gpu/drm/amd/display/include/gpio_service_interface.h b/drivers/gpu/drm/amd/display/include/gpio_service_interface.h
index 7e3240e73c1f..63813009a3a6 100644
--- a/drivers/gpu/drm/amd/display/include/gpio_service_interface.h
+++ b/drivers/gpu/drm/amd/display/include/gpio_service_interface.h
@@ -86,6 +86,9 @@ enum dc_irq_source dal_irq_get_source(
 enum dc_irq_source dal_irq_get_rx_source(
 	const struct gpio *irq);
 
+enum dc_irq_source dal_irq_get_read_request(
+	const struct gpio *irq);
+
 enum gpio_result dal_irq_setup_hpd_filter(
 	struct gpio *irq,
 	struct gpio_hpd_config *config);
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h
index 55c7d873175f..a37634942b07 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h
@@ -386,6 +386,7 @@ enum mod_hdcp_status mod_hdcp_write_repeater_auth_ack(struct mod_hdcp *hdcp);
 enum mod_hdcp_status mod_hdcp_write_stream_manage(struct mod_hdcp *hdcp);
 enum mod_hdcp_status mod_hdcp_write_content_type(struct mod_hdcp *hdcp);
 enum mod_hdcp_status mod_hdcp_clear_cp_irq_status(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_write_poll_read_lc_fw(struct mod_hdcp *hdcp);
 
 /* hdcp version helpers */
 static inline uint8_t is_dp_hdcp(struct mod_hdcp *hdcp)
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
index 1d41dd58f6bc..bb8ae80b37f8 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
@@ -452,21 +452,12 @@ out:
 	return status;
 }
 
-static enum mod_hdcp_status locality_check(struct mod_hdcp *hdcp,
+static enum mod_hdcp_status locality_check_sw(struct mod_hdcp *hdcp,
 		struct mod_hdcp_event_context *event_ctx,
 		struct mod_hdcp_transition_input_hdcp2 *input)
 {
 	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
-	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
-		event_ctx->unexpected_event = 1;
-		goto out;
-	}
-
-	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_prepare_lc_init,
-			&input->lc_init_prepare, &status,
-			hdcp, "lc_init_prepare"))
-		goto out;
 	if (!mod_hdcp_execute_and_set(mod_hdcp_write_lc_init,
 			&input->lc_init_write, &status,
 			 hdcp, "lc_init_write"))
@@ -482,6 +473,48 @@ static enum mod_hdcp_status locality_check(struct mod_hdcp *hdcp,
 			&input->l_prime_read, &status,
 			hdcp, "l_prime_read"))
 		goto out;
+out:
+	return status;
+}
+
+static enum mod_hdcp_status locality_check_fw(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_write_poll_read_lc_fw,
+			&input->l_prime_read, &status,
+			hdcp, "l_prime_read"))
+		goto out;
+
+out:
+	return status;
+}
+
+static enum mod_hdcp_status locality_check(struct mod_hdcp *hdcp,
+		struct mod_hdcp_event_context *event_ctx,
+		struct mod_hdcp_transition_input_hdcp2 *input)
+{
+	enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+	const bool use_fw = hdcp->config.ddc.funcs.atomic_write_poll_read_i2c
+			&& hdcp->config.ddc.funcs.atomic_write_poll_read_aux
+			&& !hdcp->connection.link.adjust.hdcp2.force_sw_locality_check;
+
+	if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+		event_ctx->unexpected_event = 1;
+		goto out;
+	}
+
+	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_prepare_lc_init,
+			&input->lc_init_prepare, &status,
+			hdcp, "lc_init_prepare"))
+		goto out;
+
+	status = (use_fw ? locality_check_fw : locality_check_sw)(hdcp, event_ctx, input);
+	if (status != MOD_HDCP_STATUS_SUCCESS)
+		goto out;
+
 	if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_validate_l_prime,
 			&input->l_prime_validation, &status,
 			hdcp, "l_prime_validation"))
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
index c5f6c11de7e5..89ffb89e1932 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
@@ -184,17 +184,28 @@ enum mod_hdcp_status mod_hdcp_hdcp2_transition(struct mod_hdcp *hdcp,
 		callback_in_ms(0, output);
 		set_state_id(hdcp, output, H2_A2_LOCALITY_CHECK);
 		break;
-	case H2_A2_LOCALITY_CHECK:
+	case H2_A2_LOCALITY_CHECK: {
+		const bool use_fw = hdcp->config.ddc.funcs.atomic_write_poll_read_i2c
+				&& !adjust->hdcp2.force_sw_locality_check;
+
+		/*
+		 * 1A-05: consider disconnection after LC init a failure
+		 * 1A-13-1: consider invalid l' a failure
+		 * 1A-13-2: consider l' timeout a failure
+		 */
 		if (hdcp->state.stay_count > 10 ||
 				input->lc_init_prepare != PASS ||
-				input->lc_init_write != PASS ||
-				input->l_prime_available_poll != PASS ||
-				input->l_prime_read != PASS) {
-			/*
-			 * 1A-05: consider disconnection after LC init a failure
-			 * 1A-13-1: consider invalid l' a failure
-			 * 1A-13-2: consider l' timeout a failure
-			 */
+				(!use_fw && input->lc_init_write != PASS) ||
+				(!use_fw && input->l_prime_available_poll != PASS)) {
+			fail_and_restart_in_ms(0, &status, output);
+			break;
+		} else if (input->l_prime_read != PASS) {
+			if (use_fw && hdcp->config.debug.lc_enable_sw_fallback) {
+				adjust->hdcp2.force_sw_locality_check = true;
+				callback_in_ms(0, output);
+				break;
+			}
+
 			fail_and_restart_in_ms(0, &status, output);
 			break;
 		} else if (input->l_prime_validation != PASS) {
@@ -205,6 +216,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_transition(struct mod_hdcp *hdcp,
 		callback_in_ms(0, output);
 		set_state_id(hdcp, output, H2_A3_EXCHANGE_KS_AND_TEST_FOR_REPEATER);
 		break;
+	}
 	case H2_A3_EXCHANGE_KS_AND_TEST_FOR_REPEATER:
 		if (input->eks_prepare != PASS ||
 				input->eks_write != PASS) {
@@ -498,14 +510,25 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct mod_hdcp *hdcp,
 		callback_in_ms(0, output);
 		set_state_id(hdcp, output, D2_A2_LOCALITY_CHECK);
 		break;
-	case D2_A2_LOCALITY_CHECK:
+	case D2_A2_LOCALITY_CHECK: {
+		const bool use_fw = hdcp->config.ddc.funcs.atomic_write_poll_read_aux
+				&& !adjust->hdcp2.force_sw_locality_check;
+
 		if (hdcp->state.stay_count > 10 ||
 				input->lc_init_prepare != PASS ||
-				input->lc_init_write != PASS ||
-				input->l_prime_read != PASS) {
+				(!use_fw && input->lc_init_write != PASS)) {
 			/* 1A-12: consider invalid l' a failure */
 			fail_and_restart_in_ms(0, &status, output);
 			break;
+		} else if (input->l_prime_read != PASS) {
+			if (use_fw && hdcp->config.debug.lc_enable_sw_fallback) {
+				adjust->hdcp2.force_sw_locality_check = true;
+				callback_in_ms(0, output);
+				break;
+			}
+
+			fail_and_restart_in_ms(0, &status, output);
+			break;
 		} else if (input->l_prime_validation != PASS) {
 			callback_in_ms(0, output);
 			increment_stay_counter(hdcp);
@@ -514,6 +537,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct mod_hdcp *hdcp,
 		callback_in_ms(0, output);
 		set_state_id(hdcp, output, D2_A34_EXCHANGE_KS_AND_TEST_FOR_REPEATER);
 		break;
+	}
 	case D2_A34_EXCHANGE_KS_AND_TEST_FOR_REPEATER:
 		if (input->eks_prepare != PASS ||
 				input->eks_write != PASS) {
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c
index 6e064e6ae949..2e6408579194 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c
@@ -688,3 +688,76 @@ enum mod_hdcp_status mod_hdcp_clear_cp_irq_status(struct mod_hdcp *hdcp)
 
 	return MOD_HDCP_STATUS_INVALID_OPERATION;
 }
+
+static bool write_stall_read_lc_fw_aux(struct mod_hdcp *hdcp)
+{
+	struct mod_hdcp_message_hdcp2 *hdcp2 = &hdcp->auth.msg.hdcp2;
+
+	struct mod_hdcp_atomic_op_aux write = {
+		hdcp_dpcd_addrs[MOD_HDCP_MESSAGE_ID_WRITE_LC_INIT],
+		hdcp2->lc_init + 1,
+		sizeof(hdcp2->lc_init) - 1,
+	};
+	struct mod_hdcp_atomic_op_aux stall = { 0, NULL, 0, };
+	struct mod_hdcp_atomic_op_aux read = {
+		hdcp_dpcd_addrs[MOD_HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME],
+		hdcp2->lc_l_prime + 1,
+		sizeof(hdcp2->lc_l_prime) - 1,
+	};
+
+	hdcp2->lc_l_prime[0] = HDCP_2_2_LC_SEND_LPRIME;
+
+	return hdcp->config.ddc.funcs.atomic_write_poll_read_aux(
+			hdcp->config.ddc.handle,
+			&write,
+			&stall,
+			&read,
+			16 * 1000,
+			0
+	);
+}
+
+static bool write_poll_read_lc_fw_i2c(struct mod_hdcp *hdcp)
+{
+	struct mod_hdcp_message_hdcp2 *hdcp2 = &hdcp->auth.msg.hdcp2;
+	uint8_t expected_rxstatus[2] = { sizeof(hdcp2->lc_l_prime) };
+
+	hdcp->buf[0] = hdcp_i2c_offsets[MOD_HDCP_MESSAGE_ID_WRITE_LC_INIT];
+	memmove(&hdcp->buf[1], hdcp2->lc_init, sizeof(hdcp2->lc_init));
+
+	struct mod_hdcp_atomic_op_i2c write = {
+		HDCP_I2C_ADDR,
+		0,
+		hdcp->buf,
+		sizeof(hdcp2->lc_init) + 1,
+	};
+	struct mod_hdcp_atomic_op_i2c poll = {
+		HDCP_I2C_ADDR,
+		hdcp_i2c_offsets[MOD_HDCP_MESSAGE_ID_READ_RXSTATUS],
+		expected_rxstatus,
+		sizeof(expected_rxstatus),
+	};
+	struct mod_hdcp_atomic_op_i2c read = {
+		HDCP_I2C_ADDR,
+		hdcp_i2c_offsets[MOD_HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME],
+		hdcp2->lc_l_prime,
+		sizeof(hdcp2->lc_l_prime),
+	};
+
+	return hdcp->config.ddc.funcs.atomic_write_poll_read_i2c(
+			hdcp->config.ddc.handle,
+			&write,
+			&poll,
+			&read,
+			20 * 1000,
+			6
+	);
+}
+
+enum mod_hdcp_status mod_hdcp_write_poll_read_lc_fw(struct mod_hdcp *hdcp)
+{
+	const bool success = (is_dp_hdcp(hdcp) ? write_stall_read_lc_fw_aux : write_poll_read_lc_fw_i2c)(hdcp);
+
+	return success ? MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_DDC_FAILURE;
+}
+
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
index a4d344a4db9e..c42468bb70ac 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
@@ -133,9 +133,22 @@ enum mod_hdcp_display_disable_option {
 	MOD_HDCP_DISPLAY_DISABLE_ENCRYPTION,
 };
 
+struct mod_hdcp_atomic_op_i2c {
+	uint8_t address;
+	uint8_t offset;
+	uint8_t *data;
+	uint32_t size;
+};
+
+struct mod_hdcp_atomic_op_aux {
+	uint32_t address;
+	uint8_t *data;
+	uint32_t size;
+};
+
 struct mod_hdcp_ddc {
 	void *handle;
-	struct {
+	struct mod_hdcp_ddc_funcs {
 		bool (*read_i2c)(void *handle,
 				uint32_t address,
 				uint8_t offset,
@@ -153,6 +166,22 @@ struct mod_hdcp_ddc {
 				uint32_t address,
 				const uint8_t *data,
 				uint32_t size);
+		bool (*atomic_write_poll_read_i2c)(
+				void *handle,
+				const struct mod_hdcp_atomic_op_i2c *write,
+				const struct mod_hdcp_atomic_op_i2c *poll,
+				struct mod_hdcp_atomic_op_i2c *read,
+				uint32_t poll_timeout_us,
+				uint8_t poll_mask_msb
+		);
+		bool (*atomic_write_poll_read_aux)(
+				void *handle,
+				const struct mod_hdcp_atomic_op_aux *write,
+				const struct mod_hdcp_atomic_op_aux *poll,
+				struct mod_hdcp_atomic_op_aux *read,
+				uint32_t poll_timeout_us,
+				uint8_t poll_mask_msb
+		);
 	} funcs;
 };
 
@@ -185,7 +214,8 @@ struct mod_hdcp_link_adjustment_hdcp2 {
 	uint8_t force_type		: 2;
 	uint8_t force_no_stored_km	: 1;
 	uint8_t increase_h_prime_timeout: 1;
-	uint8_t reserved		: 3;
+	uint8_t force_sw_locality_check : 1;
+	uint8_t reserved		: 2;
 };
 
 struct mod_hdcp_link_adjustment {
@@ -272,6 +302,10 @@ struct mod_hdcp_display_query {
 struct mod_hdcp_config {
 	struct mod_hdcp_psp psp;
 	struct mod_hdcp_ddc ddc;
+	struct {
+		uint8_t lc_enable_sw_fallback : 1;
+		uint8_t reserved : 7;
+	} debug;
 	uint8_t index;
 };
 
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 4c95b885d1d0..c8eccee9b023 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -366,7 +366,7 @@ enum DC_DEBUG_MASK {
 	DC_HDCP_LC_FORCE_FW_ENABLE = 0x80000,
 
 	/**
-	 * @DC_HDCP_LC_ENABLE_SW_FALLBACK If set, upon HDCP Locality Check FW
+	 * @DC_HDCP_LC_ENABLE_SW_FALLBACK: If set, upon HDCP Locality Check FW
 	 * path failure, retry using legacy SW path.
 	 */
 	DC_HDCP_LC_ENABLE_SW_FALLBACK = 0x100000,
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h
index bd8085ec54ed..2d6a598a6c25 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h
@@ -5242,6 +5242,8 @@
 #define DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT 0x0000000c
 #define DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE_MASK 0x00000003L
 #define DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT 0x00000000
+#define DEGAMMA_CONTROL__ICON_DEGAMMA_MODE_MASK 0x00000300L
+#define DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT 0x00000008
 #define DEGAMMA_CONTROL__OVL_DEGAMMA_MODE_MASK 0x00000030L
 #define DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT 0x00000004
 #define DENORM_CONTROL__DENORM_MODE_MASK 0x00000007L
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h
index c75aee25619e..6f44345277af 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h
@@ -1779,6 +1779,8 @@
 #define mmRLC_TTOP_D                                    0x3105
 #define mmRLC_CLEAR_STATE_RESTORE_BASE                  0x30C8
 #define mmRLC_PG_AO_CU_MASK                             0x310B
+#define mmSPI_STATIC_THREAD_MGMT_1			0x2438
+#define mmSPI_STATIC_THREAD_MGMT_2			0x2439
 #define mmSPI_STATIC_THREAD_MGMT_3                      0x243A
 
 #endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h
index edc8a793a95d..4dd386b98748 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h
@@ -234,6 +234,26 @@
 #define mmIH_RB_WPTR_ADDR_HI 0x0F84
 #define mmIH_RB_WPTR_ADDR_LO 0x0F85
 #define mmIH_STATUS 0x0F88
+
+#define mmDMA_GFX_RB_CNTL                                       0x3400
+#define mmDMA_GFX_RB_BASE                                       0x3401
+#define mmDMA_GFX_RB_RPTR                                       0x3402
+#define mmDMA_GFX_RB_WPTR                                       0x3403
+#define mmDMA_GFX_RB_RPTR_ADDR_HI                               0x3407
+#define mmDMA_GFX_RB_RPTR_ADDR_LO                               0x3408
+#define mmDMA_GFX_IB_CNTL                                       0x3409
+#define mmDMA_GFX_IB_RPTR                                       0x340a
+#define mmDMA_CNTL                                          0x340b
+#define mmDMA_STATUS_REG                                    0x340D
+#define mmDMA_TILING_CONFIG  				  0x342E
+#define mmDMA_SEM_INCOMPLETE_TIMER_CNTL                     0x3411
+#define mmDMA_SEM_WAIT_FAIL_TIMER_CNTL                      0x3412
+#define mmDMA_POWER_CNTL					0x342F
+#define mmDMA_CLK_CTRL					0x3430
+#define mmDMA_PG						0x3435
+#define mmDMA_PGFSM_CONFIG				0x3436
+#define mmDMA_PGFSM_WRITE					0x3437
+
 #define mmSEM_MAILBOX 0x0F9B
 #define mmSEM_MAILBOX_CLIENTCONFIG 0x0F9A
 #define mmSEM_MAILBOX_CONTROL 0x0F9C
@@ -269,7 +289,4 @@
 #define mmVCE_CONFIG 0x0F94
 #define mmXDMA_MSTR_MEM_OVERFLOW_CNTL 0x03F8
 
-/* from the old sid.h */
-#define mmDMA_TILING_CONFIG 0x342E
-
 #endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_sh_mask.h
index 1c540fe136cb..9f7fc2428b69 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_sh_mask.h
@@ -823,6 +823,43 @@
 #define LX3__RESERVED__SHIFT 0x00000000
 #define RINGOSC_MASK__MASK_MASK 0x0000ffffL
 #define RINGOSC_MASK__MASK__SHIFT 0x00000000
+
+#define DMA_CNTL__TRAP_ENABLE_MASK 0x00000001L
+#define DMA_CNTL__TRAP_ENABLE__SHIFT 0x00000000
+#define DMA_CNTL__SEM_INCOMPLETE_INT_ENABLE_MASK 0x00000002L
+#define DMA_CNTL__SEM_INCOMPLETE_INT_ENABLE__SHIFT 0x00000001
+#define DMA_CNTL__SEM_WAIT_INT_ENABLE_MASK 0x00000004L
+#define DMA_CNTL__SEM_WAIT_INT_ENABLE__SHIFT 0x00000002
+#define DMA_CNTL__DATA_SWAP_ENABLE_MASK 0x00000008L
+#define DMA_CNTL__DATA_SWAP_ENABLE__SHIFT 0x00000003
+#define DMA_CNTL__FENCE_SWAP_ENABLE_MASK 0x00000010L
+#define DMA_CNTL__FENCE_SWAP_ENABLE__SHIFT 0x00000004
+#define DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK 0x10000000L
+#define DMA_CNTL__CTXEMPTY_INT_ENABLE__SHIFT 0x0000001C
+#define DMA_GFX_RB_CNTL__RB_ENABLE_MASK 0x00000001L
+#define DMA_GFX_RB_CNTL__RB_ENABLE__SHIFT 0x00000000
+#define DMA_GFX_RB_CNTL__RB_SIZE__SHIFT 0x00000001
+#define DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK 0x00000200L
+#define DMA_GFX_RB_CNTL__RB_SWAP_ENABLE__SHIFT 0x00000009
+#define DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK 0x00001000L
+#define DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT 0x0000000C
+#define DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK 0x00002000L
+#define DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE__SHIFT 0x0000000D
+#define DMA_GFX_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT 0x00000010
+#define DMA_GFX_IB_CNTL__IB_ENABLE_MASK 0x00000001L
+#define DMA_GFX_IB_CNTL__IB_ENABLE__SHIFT 0x00000000
+#define DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK 0x00000010L
+#define DMA_GFX_IB_CNTL__IB_SWAP_ENABLE__SHIFT 0x00000004
+#define DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK 0x80000000L
+#define DMA_GFX_IB_CNTL__CMD_VMID_FORCE__SHIFT 0x0000001F
+
+#define DMA_STATUS_REG__IDLE_MASK 0x00000001L
+#define DMA_STATUS_REG__IDLE__SHIFT 0x00000000
+#define DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK 0x00000100L
+#define DMA_POWER_CNTL__MEM_POWER_OVERRIDE__SHIFT 0x00000008
+#define DMA_PG__PG_CNTL_ENABLE_MASK 0x00000001L
+#define DMA_PG__PG_CNTL_ENABLE__SHIFT 0x00000000
+
 #define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT0_MASK 0x00000007L
 #define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT0__SHIFT 0x00000000
 #define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT1_MASK 0x00000038L
@@ -1015,6 +1052,10 @@
 #define SRBM_STATUS2__VCE_BUSY__SHIFT 0x00000007
 #define SRBM_STATUS2__VCE_RQ_PENDING_MASK 0x00000008L
 #define SRBM_STATUS2__VCE_RQ_PENDING__SHIFT 0x00000003
+#define SRBM_STATUS2__DMA_BUSY_MASK 0x00000020L
+#define SRBM_STATUS2__DMA_BUSY__SHIFT 0x00000005
+#define SRBM_STATUS2__DMA1_BUSY_MASK 0x00000040L
+#define SRBM_STATUS2__DMA1_BUSY__SHIFT 0x00000006
 #define SRBM_STATUS2__XDMA_BUSY_MASK 0x00000100L
 #define SRBM_STATUS2__XDMA_BUSY__SHIFT 0x00000008
 #define SRBM_STATUS2__XSP_BUSY_MASK 0x00000010L
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_d.h
index 6b10be61efc3..bdef1f743df7 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_d.h
@@ -41,7 +41,49 @@
 #define ixLCAC_MC5_CNTL 0x012B
 #define ixLCAC_MC5_OVR_SEL 0x012C
 #define ixLCAC_MC5_OVR_VAL 0x012D
+
+#define mmCG_SPLL_FUNC_CNTL				0x0180
+#define mmCG_SPLL_FUNC_CNTL_2				0x0181
+#define mmCG_SPLL_FUNC_CNTL_3				0x0182
+#define mmCG_SPLL_FUNC_CNTL_4				0x0183
+#define mmCG_SPLL_STATUS					0x0185
+#define mmSPLL_CNTL_MODE					0x0186
+#define mmCG_SPLL_SPREAD_SPECTRUM				0x0188
+#define mmCG_SPLL_SPREAD_SPECTRUM_2			0x0189
+#define mmCG_SPLL_AUTOSCALE_CNTL				0x018B
+#define mmMPLL_BYPASSCLK_SEL				0x0197
+#define mmCG_CLKPIN_CNTL                                    0x0198
+#define mmCG_CLKPIN_CNTL_2                                  0x0199
+#define mmTHM_CLK_CNTL					0x019B
+#define mmMISC_CLK_CNTL					0x019C
+#define mmCG_THERMAL_CTRL					0x01C0
+#define mmCG_THERMAL_STATUS				0x01C1
+#define mmCG_THERMAL_INT					0x01C2
+#define mmCG_MULT_THERMAL_CTRL					0x01C4
+#define mmCG_MULT_THERMAL_STATUS					0x01C5
+#define mmCG_FDO_CTRL0					0x01D5
+#define mmCG_FDO_CTRL1					0x01D6
+#define mmCG_FDO_CTRL2					0x01D7
+#define mmCG_TACH_CTRL                                    0x01DC
+#define mmCG_TACH_STATUS                                  0x01DD
+#define mmGENERAL_PWRMGT                                  0x1E0
+#define mmCG_TPC                                            0x1E1
+#define mmSCLK_PWRMGT_CNTL                                  0x1E2
+#define mmTARGET_AND_CURRENT_PROFILE_INDEX                  0x01E6
+#define mmCG_FTV                                            0x01EF
+#define mmCG_FFCT_0                                         0x01F0
+#define mmCG_BSP                                          0x01FF
+#define mmCG_AT                                           0x0200
+#define mmCG_GIT                                          0x0201
+#define mmCG_SSP                                            0x0203
+#define mmCG_DISPLAY_GAP_CNTL                               0x020A
+#define mmCG_ULV_CONTROL					0x021E
+#define mmCG_ULV_PARAMETER				0x021F
+#define mmSMC_SCRATCH0					0x0221
+#define mmCG_CAC_CTRL					0x022E
+
 #define ixSMC_PC_C 0x80000370
+
 #define ixTHM_TMON0_DEBUG 0x03F0
 #define ixTHM_TMON0_INT_DATA 0x0380
 #define ixTHM_TMON0_RDIL0_DATA 0x0300
@@ -110,6 +152,7 @@
 #define ixTHM_TMON1_RDIR7_DATA 0x0337
 #define ixTHM_TMON1_RDIR8_DATA 0x0338
 #define ixTHM_TMON1_RDIR9_DATA 0x0339
+
 #define mmGPIOPAD_A 0x05E7
 #define mmGPIOPAD_EN 0x05E8
 #define mmGPIOPAD_EXTERN_TRIG_CNTL 0x05F1
@@ -127,6 +170,7 @@
 #define mmGPIOPAD_STRENGTH 0x05E5
 #define mmGPIOPAD_SW_INT_STAT 0x05E4
 #define mmGPIOPAD_Y 0x05E9
+
 #define mmSMC_IND_ACCESS_CNTL 0x008A
 #define mmSMC_IND_DATA_0 0x0081
 #define mmSMC_IND_DATA 0x0081
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_sh_mask.h
index 7d3925b7266e..67d3c7e13a48 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_sh_mask.h
@@ -23,10 +23,142 @@
 #ifndef SMU_6_0_SH_MASK_H
 #define SMU_6_0_SH_MASK_H
 
-#define CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV_MASK 0x03ffffffL
-#define CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV__SHIFT 0x00000000
-#define CG_SPLL_FUNC_CNTL__SPLL_REF_DIV_MASK 0x000003f0L
+#define CG_AT__CG_R_MASK 0x0000FFFFL
+#define CG_AT__CG_R__SHIFT 0x00000000
+#define CG_AT__CG_L_MASK 0xFFFF0000L
+#define CG_AT__CG_L__SHIFT 0x00000010
+
+#define CG_BSP__BSP_MASK 0x0000FFFFL
+#define CG_BSP__BSP__SHIFT 0x00000000
+#define CG_BSP__BSU_MASK 0x000F0000L
+#define CG_BSP__BSU__SHIFT 0x00000010
+
+#define CG_CAC_CTRL__CAC_WINDOW_MASK 0x00FFFFFFL
+#define CG_CAC_CTRL__CAC_WINDOW__SHIFT 0x00000000
+
+#define CG_CLKPIN_CNTL__XTALIN_DIVIDE_MASK 0x00000002L
+#define CG_CLKPIN_CNTL__XTALIN_DIVIDE__SHIFT 0x00000001
+#define CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK 0x00000004L
+#define CG_CLKPIN_CNTL__BCLK_AS_XCLK__SHIFT 0x00000002
+#define CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK 0x00000008L
+#define CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN__SHIFT 0x00000003
+#define CG_CLKPIN_CNTL_2__MUX_TCLK_TO_XCLK_MASK 0x00000100L
+#define CG_CLKPIN_CNTL_2__MUX_TCLK_TO_XCLK__SHIFT 0x00000008
+
+#define CG_DISPLAY_GAP_CNTL__DISP1_GAP_MASK 0x00000003L
+#define CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT 0x00000000
+#define CG_DISPLAY_GAP_CNTL__DISP2_GAP_MASK 0x0000000CL
+#define CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT 0x00000002
+#define CG_DISPLAY_GAP_CNTL__VBI_TIMER_COUNT_MASK 0x0003FFF0L
+#define CG_DISPLAY_GAP_CNTL__VBI_TIMER_COUNT__SHIFT 0x00000004
+#define CG_DISPLAY_GAP_CNTL__VBI_TIMER_UNIT_MASK 0x00700000
+#define CG_DISPLAY_GAP_CNTL__VBI_TIMER_UNIT__SHIFT 0x00000014
+#define CG_DISPLAY_GAP_CNTL__DISP1_GAP_MCHG_MASK 0x03000000L
+#define CG_DISPLAY_GAP_CNTL__DISP1_GAP_MCHG__SHIFT 0x00000018
+#define CG_DISPLAY_GAP_CNTL__DISP2_GAP_MCHG_MASK 0x0C000000L
+#define CG_DISPLAY_GAP_CNTL__DISP2_GAP_MCHG__SHIFT 0x0000001A
+
+#define CG_FFCT_0__UTC_0_MASK 0x000003FFL
+#define CG_FFCT_0__UTC_0__SHIFT 0x00000000
+#define CG_FFCT_0__DTC_0_MASK 0x000FFC00L
+#define CG_FFCT_0__DTC_0__SHIFT 0x0000000A
+
+#define CG_GIT__CG_GICST_MASK 0x0000FFFFL
+#define CG_GIT__CG_GICST__SHIFT 0x00000000
+#define CG_GIT__CG_GIPOT_MASK 0xFFFF0000L
+#define CG_GIT__CG_GIPOT__SHIFT 0x00000010
+
+#define CG_SPLL_FUNC_CNTL__SPLL_RESET_MASK 0x00000001L
+#define CG_SPLL_FUNC_CNTL__SPLL_RESET__SHIFT 0x00000000
+#define CG_SPLL_FUNC_CNTL__SPLL_SLEEP_MASK 0x00000002L
+#define CG_SPLL_FUNC_CNTL__SPLL_SLEEP__SHIFT 0x00000001
+#define CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK 0x00000008L
+#define CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN__SHIFT 0x00000003
+#define CG_SPLL_FUNC_CNTL__SPLL_REF_DIV_MASK 0x000003F0L
 #define CG_SPLL_FUNC_CNTL__SPLL_REF_DIV__SHIFT 0x00000004
+#define CG_SPLL_FUNC_CNTL__SPLL_PDIV_A_MASK 0x007F00000
+#define CG_SPLL_FUNC_CNTL__SPLL_PDIV_A__SHIFT 0x00000014
+#define CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL_MASK 0x0000001FF
+#define CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL__SHIFT 0x00000000
+#define CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK 0x00800000
+#define CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG__SHIFT 0x00000017
+#define CG_SPLL_FUNC_CNTL_2__SCLK_MUX_UPDATE_MASK 0x04000000
+#define CG_SPLL_FUNC_CNTL_2__SCLK_MUX_UPDATE__SHIFT 0x0000001A
+#define CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV_MASK 0x03FFFFFFL
+#define CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV__SHIFT 0x00000000
+#define CG_SPLL_FUNC_CNTL_3__SPLL_DITHEN_MASK 0x10000000L
+#define CG_SPLL_FUNC_CNTL_3__SPLL_DITHEN__SHIFT 0x0000001C
+#define CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK 0x00000002L
+#define CG_SPLL_STATUS__SPLL_CHG_STATUS__SHIFT 0x00000001
+#define CG_SPLL_SPREAD_SPECTRUM__SSEN_MASK 0x00000001L
+#define CG_SPLL_SPREAD_SPECTRUM__SSEN__SHIFT 0x00000000
+#define CG_SPLL_SPREAD_SPECTRUM__CLK_S_MASK 0x0000FFF0L
+#define CG_SPLL_SPREAD_SPECTRUM__CLK_S__SHIFT 0x00000004
+#define CG_SPLL_SPREAD_SPECTRUM_2__CLK_V_MASK 0x00000200L
+#define CG_SPLL_SPREAD_SPECTRUM_2__CLK_V__SHIFT 0x00000000
+#define CG_SPLL_AUTOSCALE_CNTL__AUTOSCALE_ON_SS_CLEAR_MASK 0x03FFFFFFL
+#define CG_SPLL_AUTOSCALE_CNTL__AUTOSCALE_ON_SS_CLEAR__SHIFT 0x00000009
+
+#define CG_SSP__SST_MASK 0x0000FFFFL
+#define CG_SSP__SST__SHIFT 0x00000000
+#define CG_SSP__SSTU_MASK 0x000F0000L
+#define CG_SSP__SSTU__SHIFT 0x00000010
+
+#define CG_THERMAL_CTRL__DPM_EVENT_SRC_MASK 0x00000007L
+#define CG_THERMAL_CTRL__DPM_EVENT_SRC__SHIFT 0x00000000
+#define CG_THERMAL_CTRL__DIG_THERM_DPM_MASK 0x003FC000
+#define CG_THERMAL_CTRL__DIG_THERM_DPM__SHIFT 0x0000000E
+#define CG_THERMAL_STATUS__FDO_PWM_DUTY_MASK 0x0001FE00L
+#define CG_THERMAL_STATUS__FDO_PWM_DUTY__SHIFT 0x00000009
+#define CG_THERMAL_INT__DIG_THERM_INTH_MASK 0x0000FF00L
+#define CG_THERMAL_INT__DIG_THERM_INTH__SHIFT 0x00000008
+#define CG_THERMAL_INT__DIG_THERM_INTL_MASK 0x00FF0000L
+#define CG_THERMAL_INT__DIG_THERM_INTL__SHIFT 0x00000010
+#define CG_THERMAL_INT__THERM_INT_MASK_HIGH_MASK 0x01000000L
+#define CG_THERMAL_INT__THERM_INT_MASK_HIGH__SHIFT 0x00000018
+#define CG_THERMAL_INT__THERM_INT_MASK_LOW_MASK	0x02000000
+#define CG_THERMAL_INT__THERM_INT_MASK_LOW__SHIFT 0x00000019
+
+#define CG_MULT_THERMAL_CTRL__TEMP_SEL_MASK	0x0FF00000L
+#define CG_MULT_THERMAL_CTRL__TEMP_SEL__SHIFT 0x00000014
+#define CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP_MASK	0x000001FFL
+#define CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP__SHIFT 0x00000000
+#define CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK	0x0003fe00L
+#define CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT 0x00000009
+
+#define CG_FDO_CTRL0__FDO_STATIC_DUTY_MASK	0x000000FFL
+#define CG_FDO_CTRL0__FDO_STATIC_DUTY__SHIFT 0x00000000
+#define CG_FDO_CTRL1__FMAX_DUTY100_MASK	0x000000FFL
+#define CG_FDO_CTRL1__FMAX_DUTY100__SHIFT 0x00000000
+#define CG_FDO_CTRL2__TMIN_MASK	0x000000FFL
+#define CG_FDO_CTRL2__TMIN__SHIFT 0x00000000
+#define CG_FDO_CTRL2__FDO_PWM_MODE_MASK	0x00003800L
+#define CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT 0x0000000B
+#define CG_FDO_CTRL2__TACH_PWM_RESP_RATE_MASK	0xFE000000L
+#define CG_FDO_CTRL2__TACH_PWM_RESP_RATE__SHIFT 0x00000019
+
+#define CG_TACH_CTRL__EDGE_PER_REV_MASK	0x00000007L
+#define CG_TACH_CTRL__EDGE_PER_REV__SHIFT 0x00000000
+#define CG_TACH_CTRL__TARGET_PERIOD_MASK	0xFFFFFFF8L
+#define CG_TACH_CTRL__TARGET_PERIOD__SHIFT 0x00000003
+#define CG_TACH_STATUS__TACH_PERIOD_MASK	0xFFFFFFFFL
+#define CG_TACH_STATUS__TACH_PERIOD__SHIFT 0x00000000
+
+#define GENERAL_PWRMGT__GLOBAL_PWRMGT_EN_MASK	0x00000001L
+#define GENERAL_PWRMGT__GLOBAL_PWRMGT_EN__SHIFT 0x00000000
+#define GENERAL_PWRMGT__STATIC_PM_EN_MASK	0x00000002L
+#define GENERAL_PWRMGT__STATIC_PM_EN__SHIFT 0x00000001
+#define GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK	0x00000004L
+#define GENERAL_PWRMGT__THERMAL_PROTECTION_DIS__SHIFT 0x00000002
+#define GENERAL_PWRMGT__THERMAL_PROTECTION_TYPE_MASK	0x00000008L
+#define GENERAL_PWRMGT__THERMAL_PROTECTION_TYPE__SHIFT 0x00000003
+#define GENERAL_PWRMGT__SW_SMIO_INDEX_MASK	0x00000040L
+#define GENERAL_PWRMGT__SW_SMIO_INDEX__SHIFT 0x00000006
+#define GENERAL_PWRMGT__VOLT_PWRMGT_EN_MASK	0x00000400L
+#define GENERAL_PWRMGT__VOLT_PWRMGT_EN__SHIFT 0x0000000A
+#define GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN_MASK	0x00800000L
+#define GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN__SHIFT 0x00000017
+
 #define GPIOPAD_A__GPIO_A_MASK 0x7fffffffL
 #define GPIOPAD_A__GPIO_A__SHIFT 0x00000000
 #define GPIOPAD_EN__GPIO_EN_MASK 0x7fffffffL
@@ -195,6 +327,7 @@
 #define GPIOPAD_SW_INT_STAT__SW_INT_STAT__SHIFT 0x00000000
 #define GPIOPAD_Y__GPIO_Y_MASK 0x7fffffffL
 #define GPIOPAD_Y__GPIO_Y__SHIFT 0x00000000
+
 #define LCAC_MC0_CNTL__MC0_ENABLE_MASK 0x00000001L
 #define LCAC_MC0_CNTL__MC0_ENABLE__SHIFT 0x00000000
 #define LCAC_MC0_CNTL__MC0_THRESHOLD_MASK 0x0001fffeL
@@ -243,6 +376,37 @@
 #define LCAC_MC5_OVR_SEL__MC5_OVR_SEL__SHIFT 0x00000000
 #define LCAC_MC5_OVR_VAL__MC5_OVR_VAL_MASK 0xffffffffL
 #define LCAC_MC5_OVR_VAL__MC5_OVR_VAL__SHIFT 0x00000000
+
+#define MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK 0x0000FF00L
+#define MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT 0x00000008
+
+#define SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF_MASK	0x00000001L
+#define SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF__SHIFT 0x00000000
+#define SCLK_PWRMGT_CNTL__SCLK_LOW_D1_MASK	0x00000002L
+#define SCLK_PWRMGT_CNTL__SCLK_LOW_D1__SHIFT 0x00000001
+#define SCLK_PWRMGT_CNTL__FIR_RESET_MASK	0x00000010L
+#define SCLK_PWRMGT_CNTL__FIR_RESET__SHIFT 0x00000004
+#define SCLK_PWRMGT_CNTL__FIR_FORCE_TREND_SEL_MASK	0x00000020L
+#define SCLK_PWRMGT_CNTL__FIR_FORCE_TREND_SEL__SHIFT 0x00000005
+#define SCLK_PWRMGT_CNTL__FIR_TREND_MODE_MASK	0x00000040L
+#define SCLK_PWRMGT_CNTL__FIR_TREND_MODE__SHIFT 0x00000006
+#define SCLK_PWRMGT_CNTL__DYN_GFX_CLK_OFF_EN_MASK	0x00000080L
+#define SCLK_PWRMGT_CNTL__DYN_GFX_CLK_OFF_EN__SHIFT 0x00000007
+#define SCLK_PWRMGT_CNTL__GFX_CLK_FORCE_ON_MASK	0x00000100L
+#define SCLK_PWRMGT_CNTL__GFX_CLK_FORCE_ON__SHIFT 0x00000008
+#define SCLK_PWRMGT_CNTL__GFX_CLK_REQUEST_OFF_MASK	0x00000200L
+#define SCLK_PWRMGT_CNTL__GFX_CLK_REQUEST_OFF__SHIFT 0x00000009
+#define SCLK_PWRMGT_CNTL__GFX_CLK_FORCE_OFF_MASK	0x00000400L
+#define SCLK_PWRMGT_CNTL__GFX_CLK_FORCE_OFF__SHIFT 0x0000000A
+#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D1_MASK	0x00000800L
+#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D1__SHIFT 0x0000000B
+#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D2_MASK	0x00001000L
+#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D2__SHIFT 0x0000000C
+#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D3_MASK	0x00002000L
+#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D3__SHIFT 0x0000000D
+#define SCLK_PWRMGT_CNTL__DYN_LIGHT_SLEEP_EN_MASK	0x00004000L
+#define SCLK_PWRMGT_CNTL__DYN_LIGHT_SLEEP_EN__SHIFT 0x0000000E
+
 #define SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK 0x00000001L
 #define SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0__SHIFT 0x00000000
 #define SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_1_MASK 0x00000100L
@@ -285,6 +449,7 @@
 #define SMC_RESP_1__SMC_RESP__SHIFT 0x00000000
 #define SMC_RESP_2__SMC_RESP_MASK 0xffffffffL
 #define SMC_RESP_2__SMC_RESP__SHIFT 0x00000000
+
 #define SPLL_CNTL_MODE__SPLL_CTLREQ_DLY_CNT_MASK 0x000ff000L
 #define SPLL_CNTL_MODE__SPLL_CTLREQ_DLY_CNT__SHIFT 0x0000000c
 #define SPLL_CNTL_MODE__SPLL_ENSAT_MASK 0x00000010L
@@ -293,6 +458,8 @@
 #define SPLL_CNTL_MODE__SPLL_FASTEN__SHIFT 0x00000003
 #define SPLL_CNTL_MODE__SPLL_LEGACY_PDIV_MASK 0x00000002L
 #define SPLL_CNTL_MODE__SPLL_LEGACY_PDIV__SHIFT 0x00000001
+#define SPLL_CNTL_MODE__SPLL_REFCLK_SEL_MASK 0x0C000000L
+#define SPLL_CNTL_MODE__SPLL_REFCLK_SEL__SHIFT 0x0000001A
 #define SPLL_CNTL_MODE__SPLL_RESET_EN_MASK 0x10000000L
 #define SPLL_CNTL_MODE__SPLL_RESET_EN__SHIFT 0x0000001c
 #define SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK 0x00000001L
@@ -303,10 +470,25 @@
 #define SPLL_CNTL_MODE__SPLL_TEST__SHIFT 0x00000002
 #define SPLL_CNTL_MODE__SPLL_VCO_MODE_MASK 0x60000000L
 #define SPLL_CNTL_MODE__SPLL_VCO_MODE__SHIFT 0x0000001d
+
 #define TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX_MASK 0x0f000000L
 #define TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX__SHIFT 0x00000018
 #define TARGET_AND_CURRENT_PROFILE_INDEX_1__TARG_PCIE_INDEX_MASK 0xf0000000L
 #define TARGET_AND_CURRENT_PROFILE_INDEX_1__TARG_PCIE_INDEX__SHIFT 0x0000001c
+
+#define TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX_MASK 0x000000F0L
+#define TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX__SHIFT 0x00000004
+
+#define THM_CLK_CNTL__CMON_CLK_SEL_MASK 0x000000FFL
+#define THM_CLK_CNTL__CMON_CLK_SEL__SHIFT 0x00000000
+#define THM_CLK_CNTL__TMON_CLK_SEL_MASK 0x0000FF00L
+#define THM_CLK_CNTL__TMON_CLK_SEL__SHIFT 0x00000008
+
+#define MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL_MASK 0x000000FFL
+#define MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL__SHIFT 0x00000000
+#define MISC_CLK_CNTL__ZCLK_SEL_MASK 0x0000FF00L
+#define MISC_CLK_CNTL__ZCLK_SEL__SHIFT 0x00000008
+
 #define THM_TMON0_DEBUG__DEBUG_RDI_MASK 0x0000001fL
 #define THM_TMON0_DEBUG__DEBUG_RDI__SHIFT 0x00000000
 #define THM_TMON0_DEBUG__DEBUG_Z_MASK 0x0000ffe0L
diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h
index b78360a71bc9..52bac19fb404 100644
--- a/drivers/gpu/drm/amd/include/atombios.h
+++ b/drivers/gpu/drm/amd/include/atombios.h
@@ -4308,7 +4308,7 @@ typedef struct _ATOM_DPCD_INFO
 // note2: From RV770, the memory is more than 32bit addressable, so we will change
 //        ucTableFormatRevision=1,ucTableContentRevision=4, the strcuture remains
 //        exactly same as 1.1 and 1.2 (1.3 is never in use), but ulStartAddrUsedByFirmware
-//        (in offset to start of memory address) is KB aligned instead of byte aligend.
+//        (in offset to start of memory address) is KB aligned instead of byte aligned.
 // Note3:
 /* If we change usReserved to "usFBUsedbyDrvInKB", then to VBIOS this usFBUsedbyDrvInKB is a predefined, unchanged
 constant across VGA or non VGA adapter,
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 21dc956b5f35..0f7542d7074b 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -128,6 +128,7 @@ enum amd_pp_sensors {
 	AMDGPU_PP_SENSOR_CPU_CLK,
 	AMDGPU_PP_SENSOR_VDDNB,
 	AMDGPU_PP_SENSOR_VDDGFX,
+	AMDGPU_PP_SENSOR_VDDBOARD,
 	AMDGPU_PP_SENSOR_UVD_VCLK,
 	AMDGPU_PP_SENSOR_UVD_DCLK,
 	AMDGPU_PP_SENSOR_VCE_ECCLK,
diff --git a/drivers/gpu/drm/amd/include/v11_structs.h b/drivers/gpu/drm/amd/include/v11_structs.h
index f8008270f813..3728389fc3be 100644
--- a/drivers/gpu/drm/amd/include/v11_structs.h
+++ b/drivers/gpu/drm/amd/include/v11_structs.h
@@ -535,8 +535,8 @@ struct v11_gfx_mqd {
 	uint32_t reserved_507; // offset: 507  (0x1FB)
 	uint32_t reserved_508; // offset: 508  (0x1FC)
 	uint32_t reserved_509; // offset: 509  (0x1FD)
-	uint32_t reserved_510; // offset: 510  (0x1FE)
-	uint32_t reserved_511; // offset: 511  (0x1FF)
+	uint32_t fence_address_lo; // offset: 510  (0x1FE)
+	uint32_t fence_address_hi; // offset: 511  (0x1FF)
 };
 
 struct v11_sdma_mqd {
@@ -1118,8 +1118,8 @@ struct v11_compute_mqd {
 	uint32_t reserved_443; // offset: 443  (0x1BB)
 	uint32_t reserved_444; // offset: 444  (0x1BC)
 	uint32_t reserved_445; // offset: 445  (0x1BD)
-	uint32_t reserved_446; // offset: 446  (0x1BE)
-	uint32_t reserved_447; // offset: 447  (0x1BF)
+	uint32_t fence_address_lo; // offset: 446  (0x1BE)
+	uint32_t fence_address_hi; // offset: 447  (0x1BF)
 	uint32_t gws_0_val; // offset: 448  (0x1C0)
 	uint32_t gws_1_val; // offset: 449  (0x1C1)
 	uint32_t gws_2_val; // offset: 450  (0x1C2)
diff --git a/drivers/gpu/drm/amd/include/v12_structs.h b/drivers/gpu/drm/amd/include/v12_structs.h
index 5eabab611b02..03a35f8a65b0 100644
--- a/drivers/gpu/drm/amd/include/v12_structs.h
+++ b/drivers/gpu/drm/amd/include/v12_structs.h
@@ -535,8 +535,8 @@ struct v12_gfx_mqd {
     uint32_t reserved_507; // offset: 507  (0x1FB)
     uint32_t reserved_508; // offset: 508  (0x1FC)
     uint32_t reserved_509; // offset: 509  (0x1FD)
-    uint32_t reserved_510; // offset: 510  (0x1FE)
-    uint32_t reserved_511; // offset: 511  (0x1FF)
+    uint32_t fence_address_lo; // offset: 510  (0x1FE)
+    uint32_t fence_address_hi; // offset: 511  (0x1FF)
 };
 
 struct v12_sdma_mqd {
@@ -1118,8 +1118,8 @@ struct v12_compute_mqd {
     uint32_t reserved_443; // offset: 443  (0x1BB)
     uint32_t reserved_444; // offset: 444  (0x1BC)
     uint32_t reserved_445; // offset: 445  (0x1BD)
-    uint32_t reserved_446; // offset: 446  (0x1BE)
-    uint32_t reserved_447; // offset: 447  (0x1BF)
+    uint32_t fence_address_lo; // offset: 446  (0x1BE)
+    uint32_t fence_address_hi; // offset: 447  (0x1BF)
     uint32_t gws_0_val; // offset: 448  (0x1C0)
     uint32_t gws_1_val; // offset: 449  (0x1C1)
     uint32_t gws_2_val; // offset: 450  (0x1C2)
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 3533d43ed1e7..2148c8db5a59 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -329,6 +329,34 @@ int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev)
 	return ret;
 }
 
+bool amdgpu_dpm_is_link_reset_supported(struct amdgpu_device *adev)
+{
+	struct smu_context *smu = adev->powerplay.pp_handle;
+	bool support_link_reset = false;
+
+	if (is_support_sw_smu(adev)) {
+		mutex_lock(&adev->pm.mutex);
+		support_link_reset = smu_link_reset_is_support(smu);
+		mutex_unlock(&adev->pm.mutex);
+	}
+
+	return support_link_reset;
+}
+
+int amdgpu_dpm_link_reset(struct amdgpu_device *adev)
+{
+	struct smu_context *smu = adev->powerplay.pp_handle;
+	int ret = -EOPNOTSUPP;
+
+	if (is_support_sw_smu(adev)) {
+		mutex_lock(&adev->pm.mutex);
+		ret = smu_link_reset(smu);
+		mutex_unlock(&adev->pm.mutex);
+	}
+
+	return ret;
+}
+
 int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev,
 				    enum PP_SMC_POWER_PROFILE type,
 				    bool en)
@@ -780,6 +808,21 @@ int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask)
 	return ret;
 }
 
+int amdgpu_dpm_reset_vcn(struct amdgpu_device *adev, uint32_t inst_mask)
+{
+	struct smu_context *smu = adev->powerplay.pp_handle;
+	int ret;
+
+	if (!is_support_sw_smu(adev))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&adev->pm.mutex);
+	ret = smu_reset_vcn(smu, inst_mask);
+	mutex_unlock(&adev->pm.mutex);
+
+	return ret;
+}
+
 int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
 				  enum pp_clock_type type,
 				  uint32_t *min,
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 922def51685b..5537dcf23b5c 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2944,6 +2944,23 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
 	return sysfs_emit(buf, "%d\n", vddgfx);
 }
 
+static ssize_t amdgpu_hwmon_show_vddboard(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	u32 vddboard;
+	int r;
+
+	/* get the voltage */
+	r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD,
+					    (void *)&vddboard);
+	if (r)
+		return r;
+
+	return sysfs_emit(buf, "%d\n", vddboard);
+}
+
 static ssize_t amdgpu_hwmon_show_vddgfx_label(struct device *dev,
 					      struct device_attribute *attr,
 					      char *buf)
@@ -2951,6 +2968,12 @@ static ssize_t amdgpu_hwmon_show_vddgfx_label(struct device *dev,
 	return sysfs_emit(buf, "vddgfx\n");
 }
 
+static ssize_t amdgpu_hwmon_show_vddboard_label(struct device *dev,
+						struct device_attribute *attr,
+						char *buf)
+{
+	return sysfs_emit(buf, "vddboard\n");
+}
 static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
 				       struct device_attribute *attr,
 				       char *buf)
@@ -3294,6 +3317,8 @@ static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, amdgpu_hwmon_show_vddgfx, NULL, 0)
 static SENSOR_DEVICE_ATTR(in0_label, S_IRUGO, amdgpu_hwmon_show_vddgfx_label, NULL, 0);
 static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, amdgpu_hwmon_show_vddnb, NULL, 0);
 static SENSOR_DEVICE_ATTR(in1_label, S_IRUGO, amdgpu_hwmon_show_vddnb_label, NULL, 0);
+static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, amdgpu_hwmon_show_vddboard, NULL, 0);
+static SENSOR_DEVICE_ATTR(in2_label, S_IRUGO, amdgpu_hwmon_show_vddboard_label, NULL, 0);
 static SENSOR_DEVICE_ATTR(power1_average, S_IRUGO, amdgpu_hwmon_show_power_avg, NULL, 0);
 static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, amdgpu_hwmon_show_power_input, NULL, 0);
 static SENSOR_DEVICE_ATTR(power1_cap_max, S_IRUGO, amdgpu_hwmon_show_power_cap_max, NULL, 0);
@@ -3341,6 +3366,8 @@ static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_in0_label.dev_attr.attr,
 	&sensor_dev_attr_in1_input.dev_attr.attr,
 	&sensor_dev_attr_in1_label.dev_attr.attr,
+	&sensor_dev_attr_in2_input.dev_attr.attr,
+	&sensor_dev_attr_in2_label.dev_attr.attr,
 	&sensor_dev_attr_power1_average.dev_attr.attr,
 	&sensor_dev_attr_power1_input.dev_attr.attr,
 	&sensor_dev_attr_power1_cap_max.dev_attr.attr,
@@ -3492,6 +3519,13 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	     attr == &sensor_dev_attr_in1_label.dev_attr.attr))
 		return 0;
 
+	/* only few boards support vddboard */
+	if ((attr == &sensor_dev_attr_in2_input.dev_attr.attr ||
+	     attr == &sensor_dev_attr_in2_label.dev_attr.attr) &&
+	     amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD,
+					     (void *)&tmp) == -EOPNOTSUPP)
+		return 0;
+
 	/* no mclk on APUs other than gc 9,4,3*/
 	if (((adev->flags & AMD_IS_APU) && (gc_ver != IP_VERSION(9, 4, 3))) &&
 	    (attr == &sensor_dev_attr_freq2_input.dev_attr.attr ||
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index 4c0f7ad14816..2c3c97587dd5 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -416,11 +416,13 @@ int amdgpu_dpm_pause_power_profile(struct amdgpu_device *adev,
 int amdgpu_dpm_baco_reset(struct amdgpu_device *adev);
 
 int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev);
+int amdgpu_dpm_link_reset(struct amdgpu_device *adev);
 int amdgpu_dpm_enable_gfx_features(struct amdgpu_device *adev);
 
 int amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev);
 
 bool amdgpu_dpm_is_mode1_reset_supported(struct amdgpu_device *adev);
+bool amdgpu_dpm_is_link_reset_supported(struct amdgpu_device *adev);
 int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev);
 
 int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
@@ -607,5 +609,6 @@ ssize_t amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev,
 				      enum pp_pm_policy p_type, char *buf);
 int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask);
 bool amdgpu_dpm_reset_sdma_is_supported(struct amdgpu_device *adev);
+int amdgpu_dpm_reset_vcn(struct amdgpu_device *adev, uint32_t inst_mask);
 
 #endif
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
index 1c25f3023e93..4c0e976004ba 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -30,16 +30,32 @@
 #include "amdgpu_atombios.h"
 #include "amdgpu_dpm_internal.h"
 #include "amd_pcie.h"
-#include "sid.h"
+#include "atom.h"
+#include "gfx_v6_0.h"
 #include "r600_dpm.h"
+#include "sid.h"
 #include "si_dpm.h"
-#include "atom.h"
 #include "../include/pptable.h"
 #include <linux/math64.h>
 #include <linux/seq_file.h>
 #include <linux/firmware.h>
 #include <legacy_dpm.h>
 
+#include "bif/bif_3_0_d.h"
+#include "bif/bif_3_0_sh_mask.h"
+
+#include "dce/dce_6_0_d.h"
+#include "dce/dce_6_0_sh_mask.h"
+
+#include "gca/gfx_6_0_d.h"
+#include "gca/gfx_6_0_sh_mask.h"
+
+#include"gmc/gmc_6_0_d.h"
+#include"gmc/gmc_6_0_sh_mask.h"
+
+#include "smu/smu_6_0_d.h"
+#include "smu/smu_6_0_sh_mask.h"
+
 #define MC_CG_ARB_FREQ_F0           0x0a
 #define MC_CG_ARB_FREQ_F1           0x0b
 #define MC_CG_ARB_FREQ_F2           0x0c
@@ -2193,7 +2209,7 @@ static u32 si_calculate_cac_wintime(struct amdgpu_device *adev)
 	if (xclk == 0)
 		return 0;
 
-	cac_window = RREG32(CG_CAC_CTRL) & CAC_WINDOW_MASK;
+	cac_window = RREG32(mmCG_CAC_CTRL) & CG_CAC_CTRL__CAC_WINDOW_MASK;
 	cac_window_size = ((cac_window & 0xFFFF0000) >> 16) * (cac_window & 0x0000FFFF);
 
 	wintime = (cac_window_size * 100) / xclk;
@@ -2489,19 +2505,19 @@ static int si_populate_sq_ramping_values(struct amdgpu_device *adev,
 	if (adev->pm.dpm.sq_ramping_threshold == 0)
 		return -EINVAL;
 
-	if (SISLANDS_DPM2_SQ_RAMP_MAX_POWER > (MAX_POWER_MASK >> MAX_POWER_SHIFT))
+	if (SISLANDS_DPM2_SQ_RAMP_MAX_POWER > (SQ_POWER_THROTTLE__MAX_POWER_MASK >> SQ_POWER_THROTTLE__MAX_POWER__SHIFT))
 		enable_sq_ramping = false;
 
-	if (SISLANDS_DPM2_SQ_RAMP_MIN_POWER > (MIN_POWER_MASK >> MIN_POWER_SHIFT))
+	if (SISLANDS_DPM2_SQ_RAMP_MIN_POWER > (SQ_POWER_THROTTLE__MIN_POWER_MASK >> SQ_POWER_THROTTLE__MIN_POWER__SHIFT))
 		enable_sq_ramping = false;
 
-	if (SISLANDS_DPM2_SQ_RAMP_MAX_POWER_DELTA > (MAX_POWER_DELTA_MASK >> MAX_POWER_DELTA_SHIFT))
+	if (SISLANDS_DPM2_SQ_RAMP_MAX_POWER_DELTA > (SQ_POWER_THROTTLE2__MAX_POWER_DELTA_MASK >> SQ_POWER_THROTTLE2__MAX_POWER_DELTA__SHIFT))
 		enable_sq_ramping = false;
 
-	if (SISLANDS_DPM2_SQ_RAMP_STI_SIZE > (STI_SIZE_MASK >> STI_SIZE_SHIFT))
+	if (SISLANDS_DPM2_SQ_RAMP_STI_SIZE > (SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE_MASK >> SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE__SHIFT))
 		enable_sq_ramping = false;
 
-	if (SISLANDS_DPM2_SQ_RAMP_LTI_RATIO > (LTI_RATIO_MASK >> LTI_RATIO_SHIFT))
+	if (SISLANDS_DPM2_SQ_RAMP_LTI_RATIO > (SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO_MASK >> SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO__SHIFT))
 		enable_sq_ramping = false;
 
 	for (i = 0; i < state->performance_level_count; i++) {
@@ -2510,14 +2526,17 @@ static int si_populate_sq_ramping_values(struct amdgpu_device *adev,
 
 		if ((state->performance_levels[i].sclk >= adev->pm.dpm.sq_ramping_threshold) &&
 		    enable_sq_ramping) {
-			sq_power_throttle |= MAX_POWER(SISLANDS_DPM2_SQ_RAMP_MAX_POWER);
-			sq_power_throttle |= MIN_POWER(SISLANDS_DPM2_SQ_RAMP_MIN_POWER);
-			sq_power_throttle2 |= MAX_POWER_DELTA(SISLANDS_DPM2_SQ_RAMP_MAX_POWER_DELTA);
-			sq_power_throttle2 |= STI_SIZE(SISLANDS_DPM2_SQ_RAMP_STI_SIZE);
-			sq_power_throttle2 |= LTI_RATIO(SISLANDS_DPM2_SQ_RAMP_LTI_RATIO);
+			sq_power_throttle |= SISLANDS_DPM2_SQ_RAMP_MAX_POWER << SQ_POWER_THROTTLE__MAX_POWER__SHIFT;
+			sq_power_throttle |= SISLANDS_DPM2_SQ_RAMP_MIN_POWER << SQ_POWER_THROTTLE__MIN_POWER__SHIFT;
+			sq_power_throttle2 |= SISLANDS_DPM2_SQ_RAMP_MAX_POWER_DELTA << SQ_POWER_THROTTLE2__MAX_POWER_DELTA__SHIFT;
+			sq_power_throttle2 |= SISLANDS_DPM2_SQ_RAMP_STI_SIZE << SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE__SHIFT;
+			sq_power_throttle2 |= SISLANDS_DPM2_SQ_RAMP_LTI_RATIO << SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO__SHIFT;
 		} else {
-			sq_power_throttle |= MAX_POWER_MASK | MIN_POWER_MASK;
-			sq_power_throttle2 |= MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
+			sq_power_throttle |= SQ_POWER_THROTTLE__MAX_POWER_MASK |
+								SQ_POWER_THROTTLE__MIN_POWER_MASK;
+			sq_power_throttle2 |= SQ_POWER_THROTTLE2__MAX_POWER_DELTA_MASK |
+								SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE_MASK |
+								SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO_MASK;
 		}
 
 		smc_state->levels[i].SQPowerThrottle = cpu_to_be32(sq_power_throttle);
@@ -2761,9 +2780,9 @@ static int si_initialize_smc_cac_tables(struct amdgpu_device *adev)
 	if (!cac_tables)
 		return -ENOMEM;
 
-	reg = RREG32(CG_CAC_CTRL) & ~CAC_WINDOW_MASK;
-	reg |= CAC_WINDOW(si_pi->powertune_data->cac_window);
-	WREG32(CG_CAC_CTRL, reg);
+	reg = RREG32(mmCG_CAC_CTRL) & ~CG_CAC_CTRL__CAC_WINDOW_MASK;
+	reg |= (si_pi->powertune_data->cac_window << CG_CAC_CTRL__CAC_WINDOW__SHIFT);
+	WREG32(mmCG_CAC_CTRL, reg);
 
 	si_pi->dyn_powertune_data.cac_leakage = adev->pm.dpm.cac_leakage;
 	si_pi->dyn_powertune_data.dc_pwr_value =
@@ -2962,10 +2981,10 @@ static int si_init_smc_spll_table(struct amdgpu_device *adev)
 		ret = si_calculate_sclk_params(adev, sclk, &sclk_params);
 		if (ret)
 			break;
-		p_div = (sclk_params.vCG_SPLL_FUNC_CNTL & SPLL_PDIV_A_MASK) >> SPLL_PDIV_A_SHIFT;
-		fb_div = (sclk_params.vCG_SPLL_FUNC_CNTL_3 & SPLL_FB_DIV_MASK) >> SPLL_FB_DIV_SHIFT;
-		clk_s = (sclk_params.vCG_SPLL_SPREAD_SPECTRUM & CLK_S_MASK) >> CLK_S_SHIFT;
-		clk_v = (sclk_params.vCG_SPLL_SPREAD_SPECTRUM_2 & CLK_V_MASK) >> CLK_V_SHIFT;
+		p_div = (sclk_params.vCG_SPLL_FUNC_CNTL & CG_SPLL_FUNC_CNTL__SPLL_PDIV_A_MASK) >> CG_SPLL_FUNC_CNTL__SPLL_PDIV_A__SHIFT;
+		fb_div = (sclk_params.vCG_SPLL_FUNC_CNTL_3 & CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV_MASK) >> CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV__SHIFT;
+		clk_s = (sclk_params.vCG_SPLL_SPREAD_SPECTRUM & CG_SPLL_SPREAD_SPECTRUM__CLK_S_MASK) >> CG_SPLL_SPREAD_SPECTRUM__CLK_S__SHIFT;
+		clk_v = (sclk_params.vCG_SPLL_SPREAD_SPECTRUM_2 & CG_SPLL_SPREAD_SPECTRUM_2__CLK_V_MASK) >> CG_SPLL_SPREAD_SPECTRUM_2__CLK_V__SHIFT;
 
 		fb_div &= ~0x00001FFF;
 		fb_div >>= 1;
@@ -3669,10 +3688,10 @@ static bool si_is_special_1gb_platform(struct amdgpu_device *adev)
 	WREG32(MC_SEQ_IO_DEBUG_INDEX, 0xb);
 	width = ((RREG32(MC_SEQ_IO_DEBUG_DATA) >> 1) & 1) ? 16 : 32;
 
-	tmp = RREG32(MC_ARB_RAMCFG);
-	row = ((tmp & NOOFROWS_MASK) >> NOOFROWS_SHIFT) + 10;
-	column = ((tmp & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT) + 8;
-	bank = ((tmp & NOOFBANK_MASK) >> NOOFBANK_SHIFT) + 2;
+	tmp = RREG32(mmMC_ARB_RAMCFG);
+	row = ((tmp & MC_ARB_RAMCFG__NOOFROWS_MASK) >> MC_ARB_RAMCFG__NOOFROWS__SHIFT) + 10;
+	column = ((tmp & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT) + 8;
+	bank = ((tmp & MC_ARB_RAMCFG__NOOFBANK_MASK) >> MC_ARB_RAMCFG__NOOFBANK__SHIFT) + 2;
 
 	density = (1 << (row + column - 20 + bank)) * width;
 
@@ -3756,11 +3775,11 @@ static void si_set_dpm_event_sources(struct amdgpu_device *adev, u32 sources)
 	}
 
 	if (want_thermal_protection) {
-		WREG32_P(CG_THERMAL_CTRL, DPM_EVENT_SRC(dpm_event_src), ~DPM_EVENT_SRC_MASK);
+		WREG32_P(mmCG_THERMAL_CTRL, dpm_event_src << CG_THERMAL_CTRL__DPM_EVENT_SRC__SHIFT, ~CG_THERMAL_CTRL__DPM_EVENT_SRC_MASK);
 		if (pi->thermal_protection)
-			WREG32_P(GENERAL_PWRMGT, 0, ~THERMAL_PROTECTION_DIS);
+			WREG32_P(mmGENERAL_PWRMGT, 0, ~GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK);
 	} else {
-		WREG32_P(GENERAL_PWRMGT, THERMAL_PROTECTION_DIS, ~THERMAL_PROTECTION_DIS);
+		WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK, ~GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK);
 	}
 }
 
@@ -3785,20 +3804,20 @@ static void si_enable_auto_throttle_source(struct amdgpu_device *adev,
 
 static void si_start_dpm(struct amdgpu_device *adev)
 {
-	WREG32_P(GENERAL_PWRMGT, GLOBAL_PWRMGT_EN, ~GLOBAL_PWRMGT_EN);
+	WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__GLOBAL_PWRMGT_EN_MASK, ~GENERAL_PWRMGT__GLOBAL_PWRMGT_EN_MASK);
 }
 
 static void si_stop_dpm(struct amdgpu_device *adev)
 {
-	WREG32_P(GENERAL_PWRMGT, 0, ~GLOBAL_PWRMGT_EN);
+	WREG32_P(mmGENERAL_PWRMGT, 0, ~GENERAL_PWRMGT__GLOBAL_PWRMGT_EN_MASK);
 }
 
 static void si_enable_sclk_control(struct amdgpu_device *adev, bool enable)
 {
 	if (enable)
-		WREG32_P(SCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_OFF);
+		WREG32_P(mmSCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF_MASK);
 	else
-		WREG32_P(SCLK_PWRMGT_CNTL, SCLK_PWRMGT_OFF, ~SCLK_PWRMGT_OFF);
+		WREG32_P(mmSCLK_PWRMGT_CNTL, SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF_MASK, ~SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF_MASK);
 
 }
 
@@ -3838,7 +3857,7 @@ static int si_notify_hw_of_powersource(struct amdgpu_device *adev, bool ac_power
 static PPSMC_Result si_send_msg_to_smc_with_parameter(struct amdgpu_device *adev,
 						      PPSMC_Msg msg, u32 parameter)
 {
-	WREG32(SMC_SCRATCH0, parameter);
+	WREG32(mmSMC_SCRATCH0, parameter);
 	return amdgpu_si_send_msg_to_smc(adev, msg);
 }
 
@@ -4023,12 +4042,12 @@ static void si_read_clock_registers(struct amdgpu_device *adev)
 {
 	struct si_power_info *si_pi = si_get_pi(adev);
 
-	si_pi->clock_registers.cg_spll_func_cntl = RREG32(CG_SPLL_FUNC_CNTL);
-	si_pi->clock_registers.cg_spll_func_cntl_2 = RREG32(CG_SPLL_FUNC_CNTL_2);
-	si_pi->clock_registers.cg_spll_func_cntl_3 = RREG32(CG_SPLL_FUNC_CNTL_3);
-	si_pi->clock_registers.cg_spll_func_cntl_4 = RREG32(CG_SPLL_FUNC_CNTL_4);
-	si_pi->clock_registers.cg_spll_spread_spectrum = RREG32(CG_SPLL_SPREAD_SPECTRUM);
-	si_pi->clock_registers.cg_spll_spread_spectrum_2 = RREG32(CG_SPLL_SPREAD_SPECTRUM_2);
+	si_pi->clock_registers.cg_spll_func_cntl = RREG32(mmCG_SPLL_FUNC_CNTL);
+	si_pi->clock_registers.cg_spll_func_cntl_2 = RREG32(mmCG_SPLL_FUNC_CNTL_2);
+	si_pi->clock_registers.cg_spll_func_cntl_3 = RREG32(mmCG_SPLL_FUNC_CNTL_3);
+	si_pi->clock_registers.cg_spll_func_cntl_4 = RREG32(mmCG_SPLL_FUNC_CNTL_4);
+	si_pi->clock_registers.cg_spll_spread_spectrum = RREG32(mmCG_SPLL_SPREAD_SPECTRUM);
+	si_pi->clock_registers.cg_spll_spread_spectrum_2 = RREG32(mmCG_SPLL_SPREAD_SPECTRUM_2);
 	si_pi->clock_registers.dll_cntl = RREG32(DLL_CNTL);
 	si_pi->clock_registers.mclk_pwrmgt_cntl = RREG32(MCLK_PWRMGT_CNTL);
 	si_pi->clock_registers.mpll_ad_func_cntl = RREG32(MPLL_AD_FUNC_CNTL);
@@ -4044,14 +4063,14 @@ static void si_enable_thermal_protection(struct amdgpu_device *adev,
 					  bool enable)
 {
 	if (enable)
-		WREG32_P(GENERAL_PWRMGT, 0, ~THERMAL_PROTECTION_DIS);
+		WREG32_P(mmGENERAL_PWRMGT, 0, ~GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK);
 	else
-		WREG32_P(GENERAL_PWRMGT, THERMAL_PROTECTION_DIS, ~THERMAL_PROTECTION_DIS);
+		WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK, ~GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK);
 }
 
 static void si_enable_acpi_power_management(struct amdgpu_device *adev)
 {
-	WREG32_P(GENERAL_PWRMGT, STATIC_PM_EN, ~STATIC_PM_EN);
+	WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__STATIC_PM_EN_MASK, ~GENERAL_PWRMGT__STATIC_PM_EN_MASK);
 }
 
 #if 0
@@ -4132,9 +4151,9 @@ static void si_program_ds_registers(struct amdgpu_device *adev)
 		tmp = 0x1;
 
 	if (eg_pi->sclk_deep_sleep) {
-		WREG32_P(MISC_CLK_CNTL, DEEP_SLEEP_CLK_SEL(tmp), ~DEEP_SLEEP_CLK_SEL_MASK);
-		WREG32_P(CG_SPLL_AUTOSCALE_CNTL, AUTOSCALE_ON_SS_CLEAR,
-			 ~AUTOSCALE_ON_SS_CLEAR);
+		WREG32_P(mmMISC_CLK_CNTL, (tmp << MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL__SHIFT), ~MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL_MASK);
+		WREG32_P(mmCG_SPLL_AUTOSCALE_CNTL, CG_SPLL_AUTOSCALE_CNTL__AUTOSCALE_ON_SS_CLEAR_MASK,
+			 ~CG_SPLL_AUTOSCALE_CNTL__AUTOSCALE_ON_SS_CLEAR_MASK);
 	}
 }
 
@@ -4143,18 +4162,18 @@ static void si_program_display_gap(struct amdgpu_device *adev)
 	u32 tmp, pipe;
 	int i;
 
-	tmp = RREG32(CG_DISPLAY_GAP_CNTL) & ~(DISP1_GAP_MASK | DISP2_GAP_MASK);
+	tmp = RREG32(mmCG_DISPLAY_GAP_CNTL) & ~(CG_DISPLAY_GAP_CNTL__DISP1_GAP_MASK | CG_DISPLAY_GAP_CNTL__DISP2_GAP_MASK);
 	if (adev->pm.dpm.new_active_crtc_count > 0)
-		tmp |= DISP1_GAP(R600_PM_DISPLAY_GAP_VBLANK_OR_WM);
+		tmp |= R600_PM_DISPLAY_GAP_VBLANK_OR_WM << CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT;
 	else
-		tmp |= DISP1_GAP(R600_PM_DISPLAY_GAP_IGNORE);
+		tmp |= R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT;
 
 	if (adev->pm.dpm.new_active_crtc_count > 1)
-		tmp |= DISP2_GAP(R600_PM_DISPLAY_GAP_VBLANK_OR_WM);
+		tmp |= R600_PM_DISPLAY_GAP_VBLANK_OR_WM << CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT;
 	else
-		tmp |= DISP2_GAP(R600_PM_DISPLAY_GAP_IGNORE);
+		tmp |= R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT;
 
-	WREG32(CG_DISPLAY_GAP_CNTL, tmp);
+	WREG32(mmCG_DISPLAY_GAP_CNTL, tmp);
 
 	tmp = RREG32(DCCG_DISP_SLOW_SELECT_REG);
 	pipe = (tmp & DCCG_DISP1_SLOW_SELECT_MASK) >> DCCG_DISP1_SLOW_SELECT_SHIFT;
@@ -4189,10 +4208,10 @@ static void si_enable_spread_spectrum(struct amdgpu_device *adev, bool enable)
 
 	if (enable) {
 		if (pi->sclk_ss)
-			WREG32_P(GENERAL_PWRMGT, DYN_SPREAD_SPECTRUM_EN, ~DYN_SPREAD_SPECTRUM_EN);
+			WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN_MASK, ~GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN_MASK);
 	} else {
-		WREG32_P(CG_SPLL_SPREAD_SPECTRUM, 0, ~SSEN);
-		WREG32_P(GENERAL_PWRMGT, 0, ~DYN_SPREAD_SPECTRUM_EN);
+		WREG32_P(mmCG_SPLL_SPREAD_SPECTRUM, 0, ~CG_SPLL_SPREAD_SPECTRUM__SSEN_MASK);
+		WREG32_P(mmGENERAL_PWRMGT, 0, ~GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN_MASK);
 	}
 }
 
@@ -4214,15 +4233,15 @@ static void si_setup_bsp(struct amdgpu_device *adev)
 			       &pi->pbsu);
 
 
-        pi->dsp = BSP(pi->bsp) | BSU(pi->bsu);
-	pi->psp = BSP(pi->pbsp) | BSU(pi->pbsu);
+        pi->dsp = (pi->bsp << CG_BSP__BSP__SHIFT) | (pi->bsu << CG_BSP__BSU__SHIFT);
+	pi->psp = (pi->pbsp << CG_BSP__BSP__SHIFT) | (pi->pbsu << CG_BSP__BSU__SHIFT);
 
-	WREG32(CG_BSP, pi->dsp);
+	WREG32(mmCG_BSP, pi->dsp);
 }
 
 static void si_program_git(struct amdgpu_device *adev)
 {
-	WREG32_P(CG_GIT, CG_GICST(R600_GICST_DFLT), ~CG_GICST_MASK);
+	WREG32_P(mmCG_GIT, R600_GICST_DFLT << CG_GIT__CG_GICST__SHIFT, ~CG_GIT__CG_GICST_MASK);
 }
 
 static void si_program_tp(struct amdgpu_device *adev)
@@ -4231,54 +4250,54 @@ static void si_program_tp(struct amdgpu_device *adev)
 	enum r600_td td = R600_TD_DFLT;
 
 	for (i = 0; i < R600_PM_NUMBER_OF_TC; i++)
-		WREG32(CG_FFCT_0 + i, (UTC_0(r600_utc[i]) | DTC_0(r600_dtc[i])));
+		WREG32(mmCG_FFCT_0 + i, (r600_utc[i] << CG_FFCT_0__UTC_0__SHIFT | r600_dtc[i] << CG_FFCT_0__DTC_0__SHIFT));
 
 	if (td == R600_TD_AUTO)
-		WREG32_P(SCLK_PWRMGT_CNTL, 0, ~FIR_FORCE_TREND_SEL);
+		WREG32_P(mmSCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_CNTL__FIR_FORCE_TREND_SEL_MASK);
 	else
-		WREG32_P(SCLK_PWRMGT_CNTL, FIR_FORCE_TREND_SEL, ~FIR_FORCE_TREND_SEL);
+		WREG32_P(mmSCLK_PWRMGT_CNTL, SCLK_PWRMGT_CNTL__FIR_FORCE_TREND_SEL_MASK, ~SCLK_PWRMGT_CNTL__FIR_FORCE_TREND_SEL_MASK);
 
 	if (td == R600_TD_UP)
-		WREG32_P(SCLK_PWRMGT_CNTL, 0, ~FIR_TREND_MODE);
+		WREG32_P(mmSCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_CNTL__FIR_TREND_MODE_MASK);
 
 	if (td == R600_TD_DOWN)
-		WREG32_P(SCLK_PWRMGT_CNTL, FIR_TREND_MODE, ~FIR_TREND_MODE);
+		WREG32_P(mmSCLK_PWRMGT_CNTL, SCLK_PWRMGT_CNTL__FIR_TREND_MODE_MASK, ~SCLK_PWRMGT_CNTL__FIR_TREND_MODE_MASK);
 }
 
 static void si_program_tpp(struct amdgpu_device *adev)
 {
-	WREG32(CG_TPC, R600_TPC_DFLT);
+	WREG32(mmCG_TPC, R600_TPC_DFLT);
 }
 
 static void si_program_sstp(struct amdgpu_device *adev)
 {
-	WREG32(CG_SSP, (SSTU(R600_SSTU_DFLT) | SST(R600_SST_DFLT)));
+	WREG32(mmCG_SSP, (R600_SSTU_DFLT << CG_SSP__SSTU__SHIFT| R600_SST_DFLT << CG_SSP__SST__SHIFT));
 }
 
 static void si_enable_display_gap(struct amdgpu_device *adev)
 {
-	u32 tmp = RREG32(CG_DISPLAY_GAP_CNTL);
+	u32 tmp = RREG32(mmCG_DISPLAY_GAP_CNTL);
 
-	tmp &= ~(DISP1_GAP_MASK | DISP2_GAP_MASK);
-	tmp |= (DISP1_GAP(R600_PM_DISPLAY_GAP_IGNORE) |
-		DISP2_GAP(R600_PM_DISPLAY_GAP_IGNORE));
+	tmp &= ~(CG_DISPLAY_GAP_CNTL__DISP1_GAP_MASK | CG_DISPLAY_GAP_CNTL__DISP2_GAP_MASK);
+	tmp |= (R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT |
+		R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT);
 
-	tmp &= ~(DISP1_GAP_MCHG_MASK | DISP2_GAP_MCHG_MASK);
-	tmp |= (DISP1_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK) |
-		DISP2_GAP_MCHG(R600_PM_DISPLAY_GAP_IGNORE));
-	WREG32(CG_DISPLAY_GAP_CNTL, tmp);
+	tmp &= ~(CG_DISPLAY_GAP_CNTL__DISP1_GAP_MCHG_MASK | CG_DISPLAY_GAP_CNTL__DISP2_GAP_MCHG_MASK);
+	tmp |= (R600_PM_DISPLAY_GAP_VBLANK << CG_DISPLAY_GAP_CNTL__DISP1_GAP_MCHG__SHIFT |
+		R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP2_GAP_MCHG__SHIFT);
+	WREG32(mmCG_DISPLAY_GAP_CNTL, tmp);
 }
 
 static void si_program_vc(struct amdgpu_device *adev)
 {
 	struct rv7xx_power_info *pi = rv770_get_pi(adev);
 
-	WREG32(CG_FTV, pi->vrc);
+	WREG32(mmCG_FTV, pi->vrc);
 }
 
 static void si_clear_vc(struct amdgpu_device *adev)
 {
-	WREG32(CG_FTV, 0);
+	WREG32(mmCG_FTV, 0);
 }
 
 static u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock)
@@ -4735,7 +4754,7 @@ static u32 si_calculate_memory_refresh_rate(struct amdgpu_device *adev,
 	u32 dram_rows;
 	u32 dram_refresh_rate;
 	u32 mc_arb_rfsh_rate;
-	u32 tmp = (RREG32(MC_ARB_RAMCFG) & NOOFROWS_MASK) >> NOOFROWS_SHIFT;
+	u32 tmp = (RREG32(mmMC_ARB_RAMCFG) & MC_ARB_RAMCFG__NOOFROWS_MASK) >> MC_ARB_RAMCFG__NOOFROWS__SHIFT;
 
 	if (tmp >= 4)
 		dram_rows = 16384;
@@ -4909,7 +4928,7 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev,
 
 	si_populate_initial_mvdd_value(adev, &table->initialState.level.mvdd);
 
-	reg = CG_R(0xffff) | CG_L(0);
+	reg = 0xffff << CG_AT__CG_R__SHIFT | 0 << CG_AT__CG_L__SHIFT;
 	table->initialState.level.aT = cpu_to_be32(reg);
 	table->initialState.level.bSP = cpu_to_be32(pi->dsp);
 	table->initialState.level.gen2PCIE = (u8)si_pi->boot_pcie_gen;
@@ -4935,10 +4954,13 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev,
 	table->initialState.level.dpm2.BelowSafeInc = 0;
 	table->initialState.level.dpm2.PwrEfficiencyRatio = 0;
 
-	reg = MIN_POWER_MASK | MAX_POWER_MASK;
+	reg = SQ_POWER_THROTTLE__MIN_POWER_MASK |
+		SQ_POWER_THROTTLE__MAX_POWER_MASK;
 	table->initialState.level.SQPowerThrottle = cpu_to_be32(reg);
 
-	reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
+	reg = SQ_POWER_THROTTLE2__MAX_POWER_DELTA_MASK |
+		SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE_MASK |
+		SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO_MASK;
 	table->initialState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
 
 	return 0;
@@ -5057,8 +5079,8 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
 
 	dll_cntl &= ~(MRDCK0_BYPASS | MRDCK1_BYPASS);
 
-	spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
-	spll_func_cntl_2 |= SCLK_MUX_SEL(4);
+	spll_func_cntl_2 &= ~CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL_MASK;
+	spll_func_cntl_2 |= 4 << CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL__SHIFT;
 
 	table->ACPIState.level.mclk.vDLL_CNTL =
 		cpu_to_be32(dll_cntl);
@@ -5102,10 +5124,10 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
 	table->ACPIState.level.dpm2.BelowSafeInc = 0;
 	table->ACPIState.level.dpm2.PwrEfficiencyRatio = 0;
 
-	reg = MIN_POWER_MASK | MAX_POWER_MASK;
+	reg = SQ_POWER_THROTTLE__MIN_POWER_MASK | SQ_POWER_THROTTLE__MAX_POWER_MASK;
 	table->ACPIState.level.SQPowerThrottle = cpu_to_be32(reg);
 
-	reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
+	reg = SQ_POWER_THROTTLE2__MAX_POWER_DELTA_MASK | SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE_MASK | SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO_MASK;
 	table->ACPIState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
 
 	return 0;
@@ -5250,8 +5272,8 @@ static int si_init_smc_table(struct amdgpu_device *adev)
 		if (ret)
 			return ret;
 
-		WREG32(CG_ULV_CONTROL, ulv->cg_ulv_control);
-		WREG32(CG_ULV_PARAMETER, ulv->cg_ulv_parameter);
+		WREG32(mmCG_ULV_CONTROL, ulv->cg_ulv_control);
+		WREG32(mmCG_ULV_PARAMETER, ulv->cg_ulv_parameter);
 
 		lane_width = amdgpu_get_pcie_lanes(adev);
 		si_write_smc_soft_register(adev, SI_SMC_SOFT_REGISTER_non_ulv_pcie_link_width, lane_width);
@@ -5294,16 +5316,16 @@ static int si_calculate_sclk_params(struct amdgpu_device *adev,
 	do_div(tmp, reference_clock);
 	fbdiv = (u32) tmp;
 
-	spll_func_cntl &= ~(SPLL_PDIV_A_MASK | SPLL_REF_DIV_MASK);
-	spll_func_cntl |= SPLL_REF_DIV(dividers.ref_div);
-	spll_func_cntl |= SPLL_PDIV_A(dividers.post_div);
+	spll_func_cntl &= ~(CG_SPLL_FUNC_CNTL__SPLL_PDIV_A_MASK | CG_SPLL_FUNC_CNTL__SPLL_REF_DIV_MASK);
+	spll_func_cntl |= dividers.ref_div << CG_SPLL_FUNC_CNTL__SPLL_REF_DIV__SHIFT;
+	spll_func_cntl |= dividers.post_div << CG_SPLL_FUNC_CNTL__SPLL_PDIV_A__SHIFT;
 
-	spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
-	spll_func_cntl_2 |= SCLK_MUX_SEL(2);
+	spll_func_cntl_2 &= ~CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL_MASK;
+	spll_func_cntl_2 |= 2 << CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL__SHIFT;
 
-	spll_func_cntl_3 &= ~SPLL_FB_DIV_MASK;
-	spll_func_cntl_3 |= SPLL_FB_DIV(fbdiv);
-	spll_func_cntl_3 |= SPLL_DITHEN;
+	spll_func_cntl_3 &= ~CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV_MASK;
+	spll_func_cntl_3 |= fbdiv << CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV__SHIFT;
+	spll_func_cntl_3 |= CG_SPLL_FUNC_CNTL_3__SPLL_DITHEN_MASK;
 
 	if (pi->sclk_ss) {
 		struct amdgpu_atom_ss ss;
@@ -5314,12 +5336,12 @@ static int si_calculate_sclk_params(struct amdgpu_device *adev,
 			u32 clk_s = reference_clock * 5 / (reference_divider * ss.rate);
 			u32 clk_v = 4 * ss.percentage * fbdiv / (clk_s * 10000);
 
-			cg_spll_spread_spectrum &= ~CLK_S_MASK;
-			cg_spll_spread_spectrum |= CLK_S(clk_s);
-			cg_spll_spread_spectrum |= SSEN;
+			cg_spll_spread_spectrum &= ~CG_SPLL_SPREAD_SPECTRUM__CLK_S_MASK;
+			cg_spll_spread_spectrum |= clk_s << CG_SPLL_SPREAD_SPECTRUM__CLK_S__SHIFT;
+			cg_spll_spread_spectrum |= CG_SPLL_SPREAD_SPECTRUM__SSEN_MASK;
 
-			cg_spll_spread_spectrum_2 &= ~CLK_V_MASK;
-			cg_spll_spread_spectrum_2 |= CLK_V(clk_v);
+			cg_spll_spread_spectrum_2 &= ~CG_SPLL_SPREAD_SPECTRUM_2__CLK_V_MASK;
+			cg_spll_spread_spectrum_2 |= clk_v << CG_SPLL_SPREAD_SPECTRUM_2__CLK_V__SHIFT;
 		}
 	}
 
@@ -5485,7 +5507,7 @@ static int si_convert_power_level_to_smc(struct amdgpu_device *adev,
 	if (pi->mclk_stutter_mode_threshold &&
 	    (pl->mclk <= pi->mclk_stutter_mode_threshold) &&
 	    !eg_pi->uvd_enabled &&
-	    (RREG32(DPG_PIPE_STUTTER_CONTROL) & STUTTER_ENABLE) &&
+	    (RREG32(mmDPG_PIPE_STUTTER_CONTROL) & DPG_PIPE_STUTTER_CONTROL__STUTTER_ENABLE_MASK) &&
 	    (adev->pm.dpm.new_active_crtc_count <= 2)) {
 		level->mcFlags |= SISLANDS_SMC_MC_STUTTER_EN;
 	}
@@ -5579,7 +5601,7 @@ static int si_populate_smc_t(struct amdgpu_device *adev,
 		return -EINVAL;
 
 	if (state->performance_level_count < 2) {
-		a_t = CG_R(0xffff) | CG_L(0);
+		a_t = 0xffff << CG_AT__CG_R__SHIFT | 0 << CG_AT__CG_L__SHIFT;
 		smc_state->levels[0].aT = cpu_to_be32(a_t);
 		return 0;
 	}
@@ -5600,13 +5622,13 @@ static int si_populate_smc_t(struct amdgpu_device *adev,
 			t_l = (i + 1) * 1000 + 50 * R600_AH_DFLT;
 		}
 
-		a_t = be32_to_cpu(smc_state->levels[i].aT) & ~CG_R_MASK;
-		a_t |= CG_R(t_l * pi->bsp / 20000);
+		a_t = be32_to_cpu(smc_state->levels[i].aT) & ~CG_AT__CG_R_MASK;
+		a_t |= (t_l * pi->bsp / 20000) << CG_AT__CG_R__SHIFT;
 		smc_state->levels[i].aT = cpu_to_be32(a_t);
 
 		high_bsp = (i == state->performance_level_count - 2) ?
 			pi->pbsp : pi->bsp;
-		a_t = CG_R(0xffff) | CG_L(t_h * high_bsp / 20000);
+		a_t = (0xffff) << CG_AT__CG_R__SHIFT | (t_h * high_bsp / 20000) << CG_AT__CG_L__SHIFT;
 		smc_state->levels[i + 1].aT = cpu_to_be32(a_t);
 	}
 
@@ -6180,9 +6202,9 @@ static int si_upload_mc_reg_table(struct amdgpu_device *adev,
 static void si_enable_voltage_control(struct amdgpu_device *adev, bool enable)
 {
 	if (enable)
-		WREG32_P(GENERAL_PWRMGT, VOLT_PWRMGT_EN, ~VOLT_PWRMGT_EN);
+		WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__VOLT_PWRMGT_EN_MASK, ~GENERAL_PWRMGT__VOLT_PWRMGT_EN_MASK);
 	else
-		WREG32_P(GENERAL_PWRMGT, 0, ~VOLT_PWRMGT_EN);
+		WREG32_P(mmGENERAL_PWRMGT, 0, ~GENERAL_PWRMGT__VOLT_PWRMGT_EN_MASK);
 }
 
 static enum si_pcie_gen si_get_maximum_link_speed(struct amdgpu_device *adev,
@@ -6204,8 +6226,8 @@ static u16 si_get_current_pcie_speed(struct amdgpu_device *adev)
 {
 	u32 speed_cntl;
 
-	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL) & LC_CURRENT_DATA_RATE_MASK;
-	speed_cntl >>= LC_CURRENT_DATA_RATE_SHIFT;
+	speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL) & PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK;
+	speed_cntl >>= PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
 
 	return (u16)speed_cntl;
 }
@@ -6412,21 +6434,21 @@ static void si_dpm_setup_asic(struct amdgpu_device *adev)
 static int si_thermal_enable_alert(struct amdgpu_device *adev,
 				   bool enable)
 {
-	u32 thermal_int = RREG32(CG_THERMAL_INT);
+	u32 thermal_int = RREG32(mmCG_THERMAL_INT);
 
 	if (enable) {
 		PPSMC_Result result;
 
-		thermal_int &= ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
-		WREG32(CG_THERMAL_INT, thermal_int);
+		thermal_int &= ~(CG_THERMAL_INT__THERM_INT_MASK_HIGH_MASK | CG_THERMAL_INT__THERM_INT_MASK_LOW_MASK);
+		WREG32(mmCG_THERMAL_INT, thermal_int);
 		result = amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_EnableThermalInterrupt);
 		if (result != PPSMC_Result_OK) {
 			DRM_DEBUG_KMS("Could not enable thermal interrupts.\n");
 			return -EINVAL;
 		}
 	} else {
-		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
-		WREG32(CG_THERMAL_INT, thermal_int);
+		thermal_int |= CG_THERMAL_INT__THERM_INT_MASK_HIGH_MASK | CG_THERMAL_INT__THERM_INT_MASK_LOW_MASK;
+		WREG32(mmCG_THERMAL_INT, thermal_int);
 	}
 
 	return 0;
@@ -6447,9 +6469,9 @@ static int si_thermal_set_temperature_range(struct amdgpu_device *adev,
 		return -EINVAL;
 	}
 
-	WREG32_P(CG_THERMAL_INT, DIG_THERM_INTH(high_temp / 1000), ~DIG_THERM_INTH_MASK);
-	WREG32_P(CG_THERMAL_INT, DIG_THERM_INTL(low_temp / 1000), ~DIG_THERM_INTL_MASK);
-	WREG32_P(CG_THERMAL_CTRL, DIG_THERM_DPM(high_temp / 1000), ~DIG_THERM_DPM_MASK);
+	WREG32_P(mmCG_THERMAL_INT, (high_temp / 1000) << CG_THERMAL_INT__DIG_THERM_INTH__SHIFT, ~CG_THERMAL_INT__DIG_THERM_INTH_MASK);
+	WREG32_P(mmCG_THERMAL_INT, (low_temp / 1000) << CG_THERMAL_INT__DIG_THERM_INTL__SHIFT, ~CG_THERMAL_INT__DIG_THERM_INTL_MASK);
+	WREG32_P(mmCG_THERMAL_CTRL, (high_temp / 1000) << CG_THERMAL_CTRL__DIG_THERM_DPM__SHIFT, ~CG_THERMAL_CTRL__DIG_THERM_DPM_MASK);
 
 	adev->pm.dpm.thermal.min_temp = low_temp;
 	adev->pm.dpm.thermal.max_temp = high_temp;
@@ -6463,20 +6485,20 @@ static void si_fan_ctrl_set_static_mode(struct amdgpu_device *adev, u32 mode)
 	u32 tmp;
 
 	if (si_pi->fan_ctrl_is_in_default_mode) {
-		tmp = (RREG32(CG_FDO_CTRL2) & FDO_PWM_MODE_MASK) >> FDO_PWM_MODE_SHIFT;
+		tmp = (RREG32(mmCG_FDO_CTRL2) & CG_FDO_CTRL2__FDO_PWM_MODE_MASK) >> CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT;
 		si_pi->fan_ctrl_default_mode = tmp;
-		tmp = (RREG32(CG_FDO_CTRL2) & TMIN_MASK) >> TMIN_SHIFT;
+		tmp = (RREG32(mmCG_FDO_CTRL2) & CG_FDO_CTRL2__TMIN_MASK) >> CG_FDO_CTRL2__TMIN__SHIFT;
 		si_pi->t_min = tmp;
 		si_pi->fan_ctrl_is_in_default_mode = false;
 	}
 
-	tmp = RREG32(CG_FDO_CTRL2) & ~TMIN_MASK;
-	tmp |= TMIN(0);
-	WREG32(CG_FDO_CTRL2, tmp);
+	tmp = RREG32(mmCG_FDO_CTRL2) & ~CG_FDO_CTRL2__TMIN_MASK;
+	tmp |= 0 << CG_FDO_CTRL2__TMIN__SHIFT;
+	WREG32(mmCG_FDO_CTRL2, tmp);
 
-	tmp = RREG32(CG_FDO_CTRL2) & ~FDO_PWM_MODE_MASK;
-	tmp |= FDO_PWM_MODE(mode);
-	WREG32(CG_FDO_CTRL2, tmp);
+	tmp = RREG32(mmCG_FDO_CTRL2) & ~CG_FDO_CTRL2__FDO_PWM_MODE_MASK;
+	tmp |= mode << CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT;
+	WREG32(mmCG_FDO_CTRL2, tmp);
 }
 
 static int si_thermal_setup_fan_table(struct amdgpu_device *adev)
@@ -6495,7 +6517,7 @@ static int si_thermal_setup_fan_table(struct amdgpu_device *adev)
 		return 0;
 	}
 
-	duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT;
+	duty100 = (RREG32(mmCG_FDO_CTRL1) & CG_FDO_CTRL1__FMAX_DUTY100_MASK) >> CG_FDO_CTRL1__FMAX_DUTY100__SHIFT;
 
 	if (duty100 == 0) {
 		adev->pm.dpm.fan.ucode_fan_control = false;
@@ -6531,7 +6553,7 @@ static int si_thermal_setup_fan_table(struct amdgpu_device *adev)
 						reference_clock) / 1600);
 	fan_table.fdo_max = cpu_to_be16((u16)duty100);
 
-	tmp = (RREG32(CG_MULT_THERMAL_CTRL) & TEMP_SEL_MASK) >> TEMP_SEL_SHIFT;
+	tmp = (RREG32(mmCG_MULT_THERMAL_CTRL) & CG_MULT_THERMAL_CTRL__TEMP_SEL_MASK) >> CG_MULT_THERMAL_CTRL__TEMP_SEL__SHIFT;
 	fan_table.temp_src = (uint8_t)tmp;
 
 	ret = amdgpu_si_copy_bytes_to_smc(adev,
@@ -6590,8 +6612,8 @@ static int si_dpm_get_fan_speed_pwm(void *handle,
 	if (adev->pm.no_fan)
 		return -ENOENT;
 
-	duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT;
-	duty = (RREG32(CG_THERMAL_STATUS) & FDO_PWM_DUTY_MASK) >> FDO_PWM_DUTY_SHIFT;
+	duty100 = (RREG32(mmCG_FDO_CTRL1) & CG_FDO_CTRL1__FMAX_DUTY100_MASK) >> CG_FDO_CTRL1__FMAX_DUTY100__SHIFT;
+	duty = (RREG32(mmCG_THERMAL_STATUS) & CG_THERMAL_STATUS__FDO_PWM_DUTY_MASK) >> CG_THERMAL_STATUS__FDO_PWM_DUTY__SHIFT;
 
 	if (duty100 == 0)
 		return -EINVAL;
@@ -6621,7 +6643,7 @@ static int si_dpm_set_fan_speed_pwm(void *handle,
 	if (speed > 255)
 		return -EINVAL;
 
-	duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT;
+	duty100 = (RREG32(mmCG_FDO_CTRL1) & CG_FDO_CTRL1__FMAX_DUTY100_MASK) >> CG_FDO_CTRL1__FMAX_DUTY100__SHIFT;
 
 	if (duty100 == 0)
 		return -EINVAL;
@@ -6630,9 +6652,9 @@ static int si_dpm_set_fan_speed_pwm(void *handle,
 	do_div(tmp64, 255);
 	duty = (u32)tmp64;
 
-	tmp = RREG32(CG_FDO_CTRL0) & ~FDO_STATIC_DUTY_MASK;
-	tmp |= FDO_STATIC_DUTY(duty);
-	WREG32(CG_FDO_CTRL0, tmp);
+	tmp = RREG32(mmCG_FDO_CTRL0) & ~CG_FDO_CTRL0__FDO_STATIC_DUTY_MASK;
+	tmp |= duty << CG_FDO_CTRL0__FDO_STATIC_DUTY__SHIFT;
+	WREG32(mmCG_FDO_CTRL0, tmp);
 
 	return 0;
 }
@@ -6672,8 +6694,8 @@ static int si_dpm_get_fan_control_mode(void *handle, u32 *fan_mode)
 	if (si_pi->fan_is_controlled_by_smc)
 		return 0;
 
-	tmp = RREG32(CG_FDO_CTRL2) & FDO_PWM_MODE_MASK;
-	*fan_mode = (tmp >> FDO_PWM_MODE_SHIFT);
+	tmp = RREG32(mmCG_FDO_CTRL2) & CG_FDO_CTRL2__FDO_PWM_MODE_MASK;
+	*fan_mode = (tmp >> CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT);
 
 	return 0;
 }
@@ -6691,7 +6713,7 @@ static int si_fan_ctrl_get_fan_speed_rpm(struct amdgpu_device *adev,
 	if (adev->pm.fan_pulses_per_revolution == 0)
 		return -ENOENT;
 
-	tach_period = (RREG32(CG_TACH_STATUS) & TACH_PERIOD_MASK) >> TACH_PERIOD_SHIFT;
+	tach_period = (RREG32(mmCG_TACH_STATUS) & CG_TACH_STATUS__TACH_PERIOD_MASK) >> CG_TACH_STATUS__TACH_PERIOD__SHIFT;
 	if (tach_period == 0)
 		return -ENOENT;
 
@@ -6720,9 +6742,9 @@ static int si_fan_ctrl_set_fan_speed_rpm(struct amdgpu_device *adev,
 		si_fan_ctrl_stop_smc_fan_control(adev);
 
 	tach_period = 60 * xclk * 10000 / (8 * speed);
-	tmp = RREG32(CG_TACH_CTRL) & ~TARGET_PERIOD_MASK;
-	tmp |= TARGET_PERIOD(tach_period);
-	WREG32(CG_TACH_CTRL, tmp);
+	tmp = RREG32(mmCG_TACH_CTRL) & ~CG_TACH_CTRL__TARGET_PERIOD_MASK;
+	tmp |= tach_period << CG_TACH_CTRL__TARGET_PERIOD__SHIFT;
+	WREG32(mmCG_TACH_CTRL, tmp);
 
 	si_fan_ctrl_set_static_mode(adev, FDO_PWM_MODE_STATIC_RPM);
 
@@ -6736,13 +6758,13 @@ static void si_fan_ctrl_set_default_mode(struct amdgpu_device *adev)
 	u32 tmp;
 
 	if (!si_pi->fan_ctrl_is_in_default_mode) {
-		tmp = RREG32(CG_FDO_CTRL2) & ~FDO_PWM_MODE_MASK;
-		tmp |= FDO_PWM_MODE(si_pi->fan_ctrl_default_mode);
-		WREG32(CG_FDO_CTRL2, tmp);
+		tmp = RREG32(mmCG_FDO_CTRL2) & ~CG_FDO_CTRL2__FDO_PWM_MODE_MASK;
+		tmp |= si_pi->fan_ctrl_default_mode << CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT;
+		WREG32(mmCG_FDO_CTRL2, tmp);
 
-		tmp = RREG32(CG_FDO_CTRL2) & ~TMIN_MASK;
-		tmp |= TMIN(si_pi->t_min);
-		WREG32(CG_FDO_CTRL2, tmp);
+		tmp = RREG32(mmCG_FDO_CTRL2) & ~CG_FDO_CTRL2__TMIN_MASK;
+		tmp |= si_pi->t_min << CG_FDO_CTRL2__TMIN__SHIFT;
+		WREG32(mmCG_FDO_CTRL2, tmp);
 		si_pi->fan_ctrl_is_in_default_mode = true;
 	}
 }
@@ -6760,14 +6782,14 @@ static void si_thermal_initialize(struct amdgpu_device *adev)
 	u32 tmp;
 
 	if (adev->pm.fan_pulses_per_revolution) {
-		tmp = RREG32(CG_TACH_CTRL) & ~EDGE_PER_REV_MASK;
-		tmp |= EDGE_PER_REV(adev->pm.fan_pulses_per_revolution -1);
-		WREG32(CG_TACH_CTRL, tmp);
+		tmp = RREG32(mmCG_TACH_CTRL) & ~CG_TACH_CTRL__EDGE_PER_REV_MASK;
+		tmp |= (adev->pm.fan_pulses_per_revolution -1) << CG_TACH_CTRL__EDGE_PER_REV__SHIFT;
+		WREG32(mmCG_TACH_CTRL, tmp);
 	}
 
-	tmp = RREG32(CG_FDO_CTRL2) & ~TACH_PWM_RESP_RATE_MASK;
-	tmp |= TACH_PWM_RESP_RATE(0x28);
-	WREG32(CG_FDO_CTRL2, tmp);
+	tmp = RREG32(mmCG_FDO_CTRL2) & ~CG_FDO_CTRL2__TACH_PWM_RESP_RATE_MASK;
+	tmp |= 0x28 << CG_FDO_CTRL2__TACH_PWM_RESP_RATE__SHIFT;
+	WREG32(mmCG_FDO_CTRL2, tmp);
 }
 
 static int si_thermal_start_thermal_controller(struct amdgpu_device *adev)
@@ -7530,8 +7552,8 @@ static void si_dpm_debugfs_print_current_performance_level(void *handle,
 	struct  si_ps *ps = si_get_ps(rps);
 	struct rv7xx_pl *pl;
 	u32 current_index =
-		(RREG32(TARGET_AND_CURRENT_PROFILE_INDEX) & CURRENT_STATE_INDEX_MASK) >>
-		CURRENT_STATE_INDEX_SHIFT;
+		(RREG32(mmTARGET_AND_CURRENT_PROFILE_INDEX) & TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX_MASK) >>
+			TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX__SHIFT;
 
 	if (current_index >= ps->performance_level_count) {
 		seq_printf(m, "invalid dpm profile %d\n", current_index);
@@ -7554,14 +7576,14 @@ static int si_dpm_set_interrupt_state(struct amdgpu_device *adev,
 	case AMDGPU_THERMAL_IRQ_LOW_TO_HIGH:
 		switch (state) {
 		case AMDGPU_IRQ_STATE_DISABLE:
-			cg_thermal_int = RREG32_SMC(CG_THERMAL_INT);
-			cg_thermal_int |= THERM_INT_MASK_HIGH;
-			WREG32_SMC(CG_THERMAL_INT, cg_thermal_int);
+			cg_thermal_int = RREG32_SMC(mmCG_THERMAL_INT);
+			cg_thermal_int |= CG_THERMAL_INT__THERM_INT_MASK_HIGH_MASK;
+			WREG32_SMC(mmCG_THERMAL_INT, cg_thermal_int);
 			break;
 		case AMDGPU_IRQ_STATE_ENABLE:
-			cg_thermal_int = RREG32_SMC(CG_THERMAL_INT);
-			cg_thermal_int &= ~THERM_INT_MASK_HIGH;
-			WREG32_SMC(CG_THERMAL_INT, cg_thermal_int);
+			cg_thermal_int = RREG32_SMC(mmCG_THERMAL_INT);
+			cg_thermal_int &= ~CG_THERMAL_INT__THERM_INT_MASK_HIGH_MASK;
+			WREG32_SMC(mmCG_THERMAL_INT, cg_thermal_int);
 			break;
 		default:
 			break;
@@ -7571,14 +7593,14 @@ static int si_dpm_set_interrupt_state(struct amdgpu_device *adev,
 	case AMDGPU_THERMAL_IRQ_HIGH_TO_LOW:
 		switch (state) {
 		case AMDGPU_IRQ_STATE_DISABLE:
-			cg_thermal_int = RREG32_SMC(CG_THERMAL_INT);
-			cg_thermal_int |= THERM_INT_MASK_LOW;
-			WREG32_SMC(CG_THERMAL_INT, cg_thermal_int);
+			cg_thermal_int = RREG32_SMC(mmCG_THERMAL_INT);
+			cg_thermal_int |= CG_THERMAL_INT__THERM_INT_MASK_LOW_MASK;
+			WREG32_SMC(mmCG_THERMAL_INT, cg_thermal_int);
 			break;
 		case AMDGPU_IRQ_STATE_ENABLE:
-			cg_thermal_int = RREG32_SMC(CG_THERMAL_INT);
-			cg_thermal_int &= ~THERM_INT_MASK_LOW;
-			WREG32_SMC(CG_THERMAL_INT, cg_thermal_int);
+			cg_thermal_int = RREG32_SMC(mmCG_THERMAL_INT);
+			cg_thermal_int &= ~CG_THERMAL_INT__THERM_INT_MASK_LOW_MASK;
+			WREG32_SMC(mmCG_THERMAL_INT, cg_thermal_int);
 			break;
 		default:
 			break;
@@ -7883,8 +7905,8 @@ static int si_dpm_get_temp(void *handle)
 	int actual_temp = 0;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
-		CTF_TEMP_SHIFT;
+	temp = (RREG32(mmCG_MULT_THERMAL_STATUS) & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >>
+		CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT;
 
 	if (temp & 0x200)
 		actual_temp = 255;
@@ -8014,8 +8036,8 @@ static int si_dpm_read_sensor(void *handle, int idx,
 	struct  si_ps *ps = si_get_ps(rps);
 	uint32_t sclk, mclk;
 	u32 pl_index =
-		(RREG32(TARGET_AND_CURRENT_PROFILE_INDEX) & CURRENT_STATE_INDEX_MASK) >>
-		CURRENT_STATE_INDEX_SHIFT;
+		(RREG32(mmTARGET_AND_CURRENT_PROFILE_INDEX) & TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX_MASK) >>
+		TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX__SHIFT;
 
 	/* size must be at least 4 bytes for all sensors */
 	if (*size < 4)
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c
index 8f994ffa9cd1..4e65ab9e931c 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c
@@ -30,6 +30,12 @@
 #include "amdgpu_ucode.h"
 #include "sislands_smc.h"
 
+#include "smu/smu_6_0_d.h"
+#include "smu/smu_6_0_sh_mask.h"
+
+#include "gca/gfx_6_0_d.h"
+#include "gca/gfx_6_0_sh_mask.h"
+
 static int si_set_smc_sram_address(struct amdgpu_device *adev,
 				   u32 smc_address, u32 limit)
 {
@@ -38,8 +44,8 @@ static int si_set_smc_sram_address(struct amdgpu_device *adev,
 	if ((smc_address + 3) > limit)
 		return -EINVAL;
 
-	WREG32(SMC_IND_INDEX_0, smc_address);
-	WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0);
+	WREG32(mmSMC_IND_INDEX_0, smc_address);
+	WREG32_P(mmSMC_IND_ACCESS_CNTL, 0, ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK);
 
 	return 0;
 }
@@ -68,7 +74,7 @@ int amdgpu_si_copy_bytes_to_smc(struct amdgpu_device *adev,
 		if (ret)
 			goto done;
 
-		WREG32(SMC_IND_DATA_0, data);
+		WREG32(mmSMC_IND_DATA_0, data);
 
 		src += 4;
 		byte_count -= 4;
@@ -83,7 +89,7 @@ int amdgpu_si_copy_bytes_to_smc(struct amdgpu_device *adev,
 		if (ret)
 			goto done;
 
-		original_data = RREG32(SMC_IND_DATA_0);
+		original_data = RREG32(mmSMC_IND_DATA_0);
 		extra_shift = 8 * (4 - byte_count);
 
 		while (byte_count > 0) {
@@ -99,7 +105,7 @@ int amdgpu_si_copy_bytes_to_smc(struct amdgpu_device *adev,
 		if (ret)
 			goto done;
 
-		WREG32(SMC_IND_DATA_0, data);
+		WREG32(mmSMC_IND_DATA_0, data);
 	}
 
 done:
@@ -121,10 +127,10 @@ void amdgpu_si_reset_smc(struct amdgpu_device *adev)
 {
 	u32 tmp;
 
-	RREG32(CB_CGTT_SCLK_CTRL);
-	RREG32(CB_CGTT_SCLK_CTRL);
-	RREG32(CB_CGTT_SCLK_CTRL);
-	RREG32(CB_CGTT_SCLK_CTRL);
+	RREG32(mmCB_CGTT_SCLK_CTRL);
+	RREG32(mmCB_CGTT_SCLK_CTRL);
+	RREG32(mmCB_CGTT_SCLK_CTRL);
+	RREG32(mmCB_CGTT_SCLK_CTRL);
 
 	tmp = RREG32_SMC(SMC_SYSCON_RESET_CNTL) |
 	      RST_REG;
@@ -170,16 +176,16 @@ PPSMC_Result amdgpu_si_send_msg_to_smc(struct amdgpu_device *adev,
 	if (!amdgpu_si_is_smc_running(adev))
 		return PPSMC_Result_Failed;
 
-	WREG32(SMC_MESSAGE_0, msg);
+	WREG32(mmSMC_MESSAGE_0, msg);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(SMC_RESP_0);
+		tmp = RREG32(mmSMC_RESP_0);
 		if (tmp != 0)
 			break;
 		udelay(1);
 	}
 
-	return (PPSMC_Result)RREG32(SMC_RESP_0);
+	return (PPSMC_Result)RREG32(mmSMC_RESP_0);
 }
 
 PPSMC_Result amdgpu_si_wait_for_smc_inactive(struct amdgpu_device *adev)
@@ -225,18 +231,18 @@ int amdgpu_si_load_smc_ucode(struct amdgpu_device *adev, u32 limit)
 		return -EINVAL;
 
 	spin_lock_irqsave(&adev->smc_idx_lock, flags);
-	WREG32(SMC_IND_INDEX_0, ucode_start_address);
-	WREG32_P(SMC_IND_ACCESS_CNTL, AUTO_INCREMENT_IND_0, ~AUTO_INCREMENT_IND_0);
+	WREG32(mmSMC_IND_INDEX_0, ucode_start_address);
+	WREG32_P(mmSMC_IND_ACCESS_CNTL, SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK, ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK);
 	while (ucode_size >= 4) {
 		/* SMC address space is BE */
 		data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3];
 
-		WREG32(SMC_IND_DATA_0, data);
+		WREG32(mmSMC_IND_DATA_0, data);
 
 		src += 4;
 		ucode_size -= 4;
 	}
-	WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0);
+	WREG32_P(mmSMC_IND_ACCESS_CNTL, 0, ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK);
 	spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
 
 	return 0;
@@ -251,7 +257,7 @@ int amdgpu_si_read_smc_sram_dword(struct amdgpu_device *adev, u32 smc_address,
 	spin_lock_irqsave(&adev->smc_idx_lock, flags);
 	ret = si_set_smc_sram_address(adev, smc_address, limit);
 	if (ret == 0)
-		*value = RREG32(SMC_IND_DATA_0);
+		*value = RREG32(mmSMC_IND_DATA_0);
 	spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
 
 	return ret;
@@ -266,7 +272,7 @@ int amdgpu_si_write_smc_sram_dword(struct amdgpu_device *adev, u32 smc_address,
 	spin_lock_irqsave(&adev->smc_idx_lock, flags);
 	ret = si_set_smc_sram_address(adev, smc_address, limit);
 	if (ret == 0)
-		WREG32(SMC_IND_DATA_0, value);
+		WREG32(mmSMC_IND_DATA_0, value);
 	spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
 
 	return ret;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c
index 4bd92fd782be..8d40ed0f0e83 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c
@@ -143,6 +143,10 @@ int atomctrl_initialize_mc_reg_table(
 	vram_info = (ATOM_VRAM_INFO_HEADER_V2_1 *)
 		smu_atom_get_data_table(hwmgr->adev,
 				GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev);
+	if (!vram_info) {
+		pr_err("Could not retrieve the VramInfo table!");
+		return -EINVAL;
+	}
 
 	if (module_index >= vram_info->ucNumOfVRAMModule) {
 		pr_err("Invalid VramInfo table.");
@@ -180,6 +184,10 @@ int atomctrl_initialize_mc_reg_table_v2_2(
 	vram_info = (ATOM_VRAM_INFO_HEADER_V2_2 *)
 		smu_atom_get_data_table(hwmgr->adev,
 				GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev);
+	if (!vram_info) {
+		pr_err("Could not retrieve the VramInfo table!");
+		return -EINVAL;
+	}
 
 	if (module_index >= vram_info->ucNumOfVRAMModule) {
 		pr_err("Invalid VramInfo table.");
diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c
index 5a010cd38303..baf51cd82a35 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c
@@ -46,42 +46,6 @@ static int smu7_set_smc_sram_address(struct pp_hwmgr *hwmgr, uint32_t smc_addr,
 }
 
 
-int smu7_copy_bytes_from_smc(struct pp_hwmgr *hwmgr, uint32_t smc_start_address, uint32_t *dest, uint32_t byte_count, uint32_t limit)
-{
-	uint32_t data;
-	uint32_t addr;
-	uint8_t *dest_byte;
-	uint8_t i, data_byte[4] = {0};
-	uint32_t *pdata = (uint32_t *)&data_byte;
-
-	PP_ASSERT_WITH_CODE((0 == (3 & smc_start_address)), "SMC address must be 4 byte aligned.", return -EINVAL);
-	PP_ASSERT_WITH_CODE((limit > (smc_start_address + byte_count)), "SMC address is beyond the SMC RAM area.", return -EINVAL);
-
-	addr = smc_start_address;
-
-	while (byte_count >= 4) {
-		smu7_read_smc_sram_dword(hwmgr, addr, &data, limit);
-
-		*dest = PP_SMC_TO_HOST_UL(data);
-
-		dest += 1;
-		byte_count -= 4;
-		addr += 4;
-	}
-
-	if (byte_count) {
-		smu7_read_smc_sram_dword(hwmgr, addr, &data, limit);
-		*pdata = PP_SMC_TO_HOST_UL(data);
-	/* Cast dest into byte type in dest_byte.  This way, we don't overflow if the allocated memory is not 4-byte aligned. */
-		dest_byte = (uint8_t *)dest;
-		for (i = 0; i < byte_count; i++)
-			dest_byte[i] = data_byte[i];
-	}
-
-	return 0;
-}
-
-
 int smu7_copy_bytes_to_smc(struct pp_hwmgr *hwmgr, uint32_t smc_start_address,
 				const uint8_t *src, uint32_t byte_count, uint32_t limit)
 {
diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.h b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.h
index e7303dc8c260..63e428ceaee4 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.h
@@ -53,8 +53,6 @@ struct smu7_smumgr {
 };
 
 
-int smu7_copy_bytes_from_smc(struct pp_hwmgr *hwmgr, uint32_t smc_start_address,
-				uint32_t *dest, uint32_t byte_count, uint32_t limit);
 int smu7_copy_bytes_to_smc(struct pp_hwmgr *hwmgr, uint32_t smc_start_address,
 			const uint8_t *src, uint32_t byte_count, uint32_t limit);
 int smu7_program_jump_on_start(struct pp_hwmgr *hwmgr);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 46cce1d2aaf3..f345c233bc47 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -3445,6 +3445,19 @@ bool smu_mode2_reset_is_support(struct smu_context *smu)
 	return ret;
 }
 
+bool smu_link_reset_is_support(struct smu_context *smu)
+{
+	bool ret = false;
+
+	if (!smu->pm_enabled)
+		return false;
+
+	if (smu->ppt_funcs && smu->ppt_funcs->link_reset_is_support)
+		ret = smu->ppt_funcs->link_reset_is_support(smu);
+
+	return ret;
+}
+
 int smu_mode1_reset(struct smu_context *smu)
 {
 	int ret = 0;
@@ -3475,6 +3488,19 @@ static int smu_mode2_reset(void *handle)
 	return ret;
 }
 
+int smu_link_reset(struct smu_context *smu)
+{
+	int ret = 0;
+
+	if (!smu->pm_enabled)
+		return -EOPNOTSUPP;
+
+	if (smu->ppt_funcs->link_reset)
+		ret = smu->ppt_funcs->link_reset(smu);
+
+	return ret;
+}
+
 static int smu_enable_gfx_features(void *handle)
 {
 	struct smu_context *smu = handle;
@@ -3975,3 +4001,11 @@ int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask)
 
 	return ret;
 }
+
+int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask)
+{
+	if (smu->ppt_funcs && smu->ppt_funcs->dpm_reset_vcn)
+		smu->ppt_funcs->dpm_reset_vcn(smu, inst_mask);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index dd6d0e7aa242..a95a3dd5a895 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -438,9 +438,11 @@ struct mclock_latency_table {
 };
 
 enum smu_reset_mode {
-    SMU_RESET_MODE_0,
-    SMU_RESET_MODE_1,
-    SMU_RESET_MODE_2,
+	SMU_RESET_MODE_0,
+	SMU_RESET_MODE_1,
+	SMU_RESET_MODE_2,
+	SMU_RESET_MODE_3,
+	SMU_RESET_MODE_4,
 };
 
 enum smu_baco_state {
@@ -1229,12 +1231,18 @@ struct pptable_funcs {
 	 * @mode1_reset_is_support: Check if GPU supports mode1 reset.
 	 */
 	bool (*mode1_reset_is_support)(struct smu_context *smu);
+
 	/**
 	 * @mode2_reset_is_support: Check if GPU supports mode2 reset.
 	 */
 	bool (*mode2_reset_is_support)(struct smu_context *smu);
 
 	/**
+	 * @link_reset_is_support: Check if GPU supports link reset.
+	 */
+	bool (*link_reset_is_support)(struct smu_context *smu);
+
+	/**
 	 * @mode1_reset: Perform mode1 reset.
 	 *
 	 * Complete GPU reset.
@@ -1252,6 +1260,13 @@ struct pptable_funcs {
 	int (*enable_gfx_features)(struct smu_context *smu);
 
 	/**
+	 * @link_reset: Perform link reset.
+	 *
+	 * The gfx device driver reset
+	 */
+	int (*link_reset)(struct smu_context *smu);
+
+	/**
 	 * @get_dpm_ultimate_freq: Get the hard frequency range of a clock
 	 *                         domain in MHz.
 	 */
@@ -1383,6 +1398,11 @@ struct pptable_funcs {
 	bool (*reset_sdma_is_supported)(struct smu_context *smu);
 
 	/**
+	 * @reset_vcn: message SMU to soft reset vcn instance.
+	 */
+	int (*dpm_reset_vcn)(struct smu_context *smu, uint32_t inst_mask);
+
+	/**
 	 * @get_ecc_table:  message SMU to get ECC INFO table.
 	 */
 	ssize_t (*get_ecc_info)(struct smu_context *smu, void *table);
@@ -1602,7 +1622,9 @@ int smu_get_power_limit(void *handle,
 
 bool smu_mode1_reset_is_support(struct smu_context *smu);
 bool smu_mode2_reset_is_support(struct smu_context *smu);
+bool smu_link_reset_is_support(struct smu_context *smu);
 int smu_mode1_reset(struct smu_context *smu);
+int smu_link_reset(struct smu_context *smu);
 
 extern const struct amd_ip_funcs smu_ip_funcs;
 
@@ -1643,6 +1665,7 @@ int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size);
 int smu_send_rma_reason(struct smu_context *smu);
 int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask);
 bool smu_reset_sdma_is_supported(struct smu_context *smu);
+int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask);
 int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type,
 		      int level);
 ssize_t smu_get_pm_policy_info(struct smu_context *smu,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
index d26f35119a12..3d9e5e967c94 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
@@ -459,4 +459,11 @@ typedef struct __attribute__((packed, aligned(4))) {
   uint64_t AccGfxclkBelowHostLimit;
 } VfMetricsTable_t;
 
+#pragma pack(push, 4)
+typedef struct {
+  // Telemetry
+  uint32_t InputTelemetryVoltageInmV;
+} StaticMetricsTable_t;
+#pragma pack(pop)
+
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
index 288b2576432b..41f268313613 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
@@ -94,7 +94,9 @@
 #define PPSMC_MSG_RmaDueToBadPageThreshold          0x43
 #define PPSMC_MSG_SetThrottlingPolicy               0x44
 #define PPSMC_MSG_ResetSDMA                         0x4D
-#define PPSMC_Message_Count                         0x4E
+#define PPSMC_MSG_ResetVCN                          0x4E
+#define PPSMC_MSG_GetStaticMetricsTable             0x59
+#define PPSMC_Message_Count                         0x5A
 
 //PPSMC Reset Types for driver msg argument
 #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET        0x1
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index c9dee09395e3..eefdaa0b5df6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -277,6 +277,7 @@
 	__SMU_DUMMY_MAP(MALLPowerController), \
 	__SMU_DUMMY_MAP(MALLPowerState), \
 	__SMU_DUMMY_MAP(ResetSDMA), \
+	__SMU_DUMMY_MAP(ResetVCN), \
 	__SMU_DUMMY_MAP(GetStaticMetricsTable),
 
 #undef __SMU_DUMMY_MAP
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h
index ed8304d82831..56ae555bb52a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h
@@ -281,11 +281,6 @@ int smu_v11_0_set_single_dpm_table(struct smu_context *smu,
 				   enum smu_clk_type clk_type,
 				   struct smu_11_0_dpm_table *single_dpm_table);
 
-int smu_v11_0_get_dpm_level_range(struct smu_context *smu,
-				  enum smu_clk_type clk_type,
-				  uint32_t *min_value,
-				  uint32_t *max_value);
-
 int smu_v11_0_get_current_pcie_link_width_level(struct smu_context *smu);
 
 uint16_t smu_v11_0_get_current_pcie_link_width(struct smu_context *smu);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index cd03caffe317..c22ed28c0cdb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -112,6 +112,7 @@ struct smu_13_0_dpm_context {
 	uint32_t                    workload_policy_mask;
 	uint32_t                    dcef_min_ds_clk;
 	uint64_t                    caps;
+	uint32_t                    board_volt;
 };
 
 enum smu_13_0_power_state {
@@ -183,13 +184,6 @@ int smu_v13_0_disable_thermal_alert(struct smu_context *smu);
 
 int smu_v13_0_get_gfx_vdd(struct smu_context *smu, uint32_t *value);
 
-int smu_v13_0_set_min_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk);
-
-int
-smu_v13_0_display_clock_voltage_request(struct smu_context *smu,
-					struct pp_display_clock_request
-					*clock_req);
-
 uint32_t
 smu_v13_0_get_fan_control_mode(struct smu_context *smu);
 
@@ -226,11 +220,6 @@ int smu_v13_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type c
 int smu_v13_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type,
 					  uint32_t min, uint32_t max, bool automatic);
 
-int smu_v13_0_set_hard_freq_limited_range(struct smu_context *smu,
-					  enum smu_clk_type clk_type,
-					  uint32_t min,
-					  uint32_t max);
-
 int smu_v13_0_set_performance_level(struct smu_context *smu,
 				    enum amd_dpm_forced_level level);
 
@@ -310,6 +299,7 @@ int smu_v13_0_get_boot_freq_by_index(struct smu_context *smu,
 				     uint32_t *value);
 
 void smu_v13_0_interrupt_work(struct smu_context *smu);
+void smu_v13_0_reset_custom_level(struct smu_context *smu);
 bool smu_v13_0_12_is_dpm_running(struct smu_context *smu);
 int smu_v13_0_12_get_max_metrics_size(void);
 int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 453952cdc353..9ad46f545d15 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -1347,7 +1347,7 @@ static int arcturus_get_power_limit(struct smu_context *smu,
 		*default_power_limit = power_limit;
 	if (max_power_limit)
 		*max_power_limit = power_limit;
-	/**
+	/*
 	 * No lower bound is imposed on the limit. Any unreasonable limit set
 	 * will result in frequent throttling.
 	 */
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index 25fabf336a64..78e4186d06cc 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -2059,45 +2059,6 @@ int smu_v11_0_set_single_dpm_table(struct smu_context *smu,
 	return 0;
 }
 
-int smu_v11_0_get_dpm_level_range(struct smu_context *smu,
-				  enum smu_clk_type clk_type,
-				  uint32_t *min_value,
-				  uint32_t *max_value)
-{
-	uint32_t level_count = 0;
-	int ret = 0;
-
-	if (!min_value && !max_value)
-		return -EINVAL;
-
-	if (min_value) {
-		/* by default, level 0 clock value as min value */
-		ret = smu_v11_0_get_dpm_freq_by_index(smu,
-						      clk_type,
-						      0,
-						      min_value);
-		if (ret)
-			return ret;
-	}
-
-	if (max_value) {
-		ret = smu_v11_0_get_dpm_level_count(smu,
-						    clk_type,
-						    &level_count);
-		if (ret)
-			return ret;
-
-		ret = smu_v11_0_get_dpm_freq_by_index(smu,
-						      clk_type,
-						      level_count - 1,
-						      max_value);
-		if (ret)
-			return ret;
-	}
-
-	return ret;
-}
-
 int smu_v11_0_get_current_pcie_link_width_level(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index 83163d7c7f00..5cb3b9bb6089 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1270,6 +1270,7 @@ static int aldebaran_set_performance_level(struct smu_context *smu,
 	struct smu_13_0_dpm_table *gfx_table =
 		&dpm_context->dpm_tables.gfx_table;
 	struct smu_umd_pstate_table *pstate_table = &smu->pstate_table;
+	int r;
 
 	/* Disable determinism if switching to another mode */
 	if ((smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) &&
@@ -1282,7 +1283,11 @@ static int aldebaran_set_performance_level(struct smu_context *smu,
 
 	case AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM:
 		return 0;
-
+	case AMD_DPM_FORCED_LEVEL_AUTO:
+		r = smu_v13_0_set_performance_level(smu, level);
+		if (!r)
+			smu_v13_0_reset_custom_level(smu);
+		return r;
 	case AMD_DPM_FORCED_LEVEL_HIGH:
 	case AMD_DPM_FORCED_LEVEL_LOW:
 	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
@@ -1423,7 +1428,11 @@ static int aldebaran_usr_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_
 			min_clk = dpm_context->dpm_tables.gfx_table.min;
 			max_clk = dpm_context->dpm_tables.gfx_table.max;
 
-			return aldebaran_set_soft_freq_limited_range(smu, SMU_GFXCLK, min_clk, max_clk, false);
+			ret = aldebaran_set_soft_freq_limited_range(
+				smu, SMU_GFXCLK, min_clk, max_clk, false);
+			if (ret)
+				return ret;
+			smu_v13_0_reset_custom_level(smu);
 		}
 		break;
 	case PP_OD_COMMIT_DPM_TABLE:
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index ba5a9012dbd5..45b5ab423844 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -709,18 +709,6 @@ int smu_v13_0_notify_memory_pool_location(struct smu_context *smu)
 	return ret;
 }
 
-int smu_v13_0_set_min_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk)
-{
-	int ret;
-
-	ret = smu_cmn_send_smc_msg_with_param(smu,
-					      SMU_MSG_SetMinDeepSleepDcefclk, clk, NULL);
-	if (ret)
-		dev_err(smu->adev->dev, "SMU13 attempt to set divider for DCEFCLK Failed!");
-
-	return ret;
-}
-
 int smu_v13_0_set_driver_table_location(struct smu_context *smu)
 {
 	struct smu_table *driver_table = &smu->smu_table.driver_table;
@@ -1073,56 +1061,6 @@ int smu_v13_0_get_gfx_vdd(struct smu_context *smu, uint32_t *value)
 
 }
 
-int
-smu_v13_0_display_clock_voltage_request(struct smu_context *smu,
-					struct pp_display_clock_request
-					*clock_req)
-{
-	enum amd_pp_clock_type clk_type = clock_req->clock_type;
-	int ret = 0;
-	enum smu_clk_type clk_select = 0;
-	uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000;
-
-	if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) ||
-	    smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT)) {
-		switch (clk_type) {
-		case amd_pp_dcef_clock:
-			clk_select = SMU_DCEFCLK;
-			break;
-		case amd_pp_disp_clock:
-			clk_select = SMU_DISPCLK;
-			break;
-		case amd_pp_pixel_clock:
-			clk_select = SMU_PIXCLK;
-			break;
-		case amd_pp_phy_clock:
-			clk_select = SMU_PHYCLK;
-			break;
-		case amd_pp_mem_clock:
-			clk_select = SMU_UCLK;
-			break;
-		default:
-			dev_info(smu->adev->dev, "[%s] Invalid Clock Type!", __func__);
-			ret = -EINVAL;
-			break;
-		}
-
-		if (ret)
-			goto failed;
-
-		if (clk_select == SMU_UCLK && smu->disable_uclk_switch)
-			return 0;
-
-		ret = smu_v13_0_set_hard_freq_limited_range(smu, clk_select, clk_freq, 0);
-
-		if (clk_select == SMU_UCLK)
-			smu->hard_min_uclk_req_from_dal = clk_freq;
-	}
-
-failed:
-	return ret;
-}
-
 uint32_t smu_v13_0_get_fan_control_mode(struct smu_context *smu)
 {
 	if (!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_FAN_CONTROL_BIT))
@@ -1647,45 +1585,6 @@ out:
 	return ret;
 }
 
-int smu_v13_0_set_hard_freq_limited_range(struct smu_context *smu,
-					  enum smu_clk_type clk_type,
-					  uint32_t min,
-					  uint32_t max)
-{
-	int ret = 0, clk_id = 0;
-	uint32_t param;
-
-	if (min <= 0 && max <= 0)
-		return -EINVAL;
-
-	if (!smu_cmn_clk_dpm_is_enabled(smu, clk_type))
-		return 0;
-
-	clk_id = smu_cmn_to_asic_specific_index(smu,
-						CMN2ASIC_MAPPING_CLK,
-						clk_type);
-	if (clk_id < 0)
-		return clk_id;
-
-	if (max > 0) {
-		param = (uint32_t)((clk_id << 16) | (max & 0xffff));
-		ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMaxByFreq,
-						      param, NULL);
-		if (ret)
-			return ret;
-	}
-
-	if (min > 0) {
-		param = (uint32_t)((clk_id << 16) | (min & 0xffff));
-		ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinByFreq,
-						      param, NULL);
-		if (ret)
-			return ret;
-	}
-
-	return ret;
-}
-
 int smu_v13_0_set_performance_level(struct smu_context *smu,
 				    enum amd_dpm_forced_level level)
 {
@@ -2595,3 +2494,13 @@ int smu_v13_0_set_wbrf_exclusion_ranges(struct smu_context *smu,
 
 	return ret;
 }
+
+void smu_v13_0_reset_custom_level(struct smu_context *smu)
+{
+	struct smu_umd_pstate_table *pstate_table = &smu->pstate_table;
+
+	pstate_table->uclk_pstate.custom.min = 0;
+	pstate_table->uclk_pstate.custom.max = 0;
+	pstate_table->gfxclk_pstate.custom.min = 0;
+	pstate_table->gfxclk_pstate.custom.max = 0;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
index 238bd71baa6d..533d58e57d05 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
@@ -187,26 +187,6 @@ int smu_v13_0_12_get_max_metrics_size(void)
 	return max(sizeof(StaticMetricsTable_t), sizeof(MetricsTable_t));
 }
 
-static int smu_v13_0_12_get_static_metrics_table(struct smu_context *smu)
-{
-	struct smu_table_context *smu_table = &smu->smu_table;
-	uint32_t table_size = smu_table->tables[SMU_TABLE_SMU_METRICS].size;
-	struct smu_table *table = &smu_table->driver_table;
-	int ret;
-
-	ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetStaticMetricsTable, NULL);
-	if (ret) {
-		dev_info(smu->adev->dev,
-			 "Failed to export static metrics table!\n");
-		return ret;
-	}
-
-	amdgpu_asic_invalidate_hdp(smu->adev, NULL);
-	memcpy(smu_table->metrics_table, table->cpu_addr, table_size);
-
-	return 0;
-}
-
 int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
@@ -217,7 +197,7 @@ int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu)
 	int ret, i;
 
 	if (!pptable->Init) {
-		ret = smu_v13_0_12_get_static_metrics_table(smu);
+		ret = smu_v13_0_6_get_static_metrics_table(smu);
 		if (ret)
 			return ret;
 
@@ -345,8 +325,8 @@ int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu,
 ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
-	struct gpu_metrics_v1_7 *gpu_metrics =
-		(struct gpu_metrics_v1_7 *)smu_table->gpu_metrics_table;
+	struct gpu_metrics_v1_8 *gpu_metrics =
+		(struct gpu_metrics_v1_8 *)smu_table->gpu_metrics_table;
 	int ret = 0, xcc_id, inst, i, j, k, idx;
 	struct amdgpu_device *adev = smu->adev;
 	u8 num_jpeg_rings_gpu_metrics;
@@ -357,7 +337,7 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table)
 	metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);
 	memcpy(metrics, smu_table->metrics_table, sizeof(MetricsTable_t));
 
-	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 7);
+	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 8);
 
 	gpu_metrics->temperature_hotspot =
 		SMUQ10_ROUND(metrics->MaxSocketTemperature);
@@ -474,6 +454,16 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table)
 				SMUQ10_ROUND(metrics->GfxBusy[inst]);
 			gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] =
 				SMUQ10_ROUND(metrics->GfxBusyAcc[inst]);
+			if (smu_v13_0_6_cap_supported(smu, SMU_CAP(HST_LIMIT_METRICS))) {
+				gpu_metrics->xcp_stats[i].gfx_below_host_limit_ppt_acc[idx] =
+					SMUQ10_ROUND(metrics->GfxclkBelowHostLimitPptAcc[inst]);
+				gpu_metrics->xcp_stats[i].gfx_below_host_limit_thm_acc[idx] =
+					SMUQ10_ROUND(metrics->GfxclkBelowHostLimitThmAcc[inst]);
+				gpu_metrics->xcp_stats[i].gfx_low_utilization_acc[idx] =
+					SMUQ10_ROUND(metrics->GfxclkLowUtilizationAcc[inst]);
+				gpu_metrics->xcp_stats[i].gfx_below_host_limit_total_acc[idx] =
+					SMUQ10_ROUND(metrics->GfxclkBelowHostLimitTotalAcc[inst]);
+			}
 			idx++;
 		}
 	}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index c478b3be37af..b6e5da7b06ef 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -101,24 +101,6 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_14.bin");
 #define MCA_BANK_IPID(_ip, _hwid, _type) \
 	[AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, }
 
-#define SMU_CAP(x) SMU_13_0_6_CAPS_##x
-
-enum smu_v13_0_6_caps {
-	SMU_CAP(DPM),
-	SMU_CAP(DPM_POLICY),
-	SMU_CAP(OTHER_END_METRICS),
-	SMU_CAP(SET_UCLK_MAX),
-	SMU_CAP(PCIE_METRICS),
-	SMU_CAP(MCA_DEBUG_MODE),
-	SMU_CAP(PER_INST_METRICS),
-	SMU_CAP(CTF_LIMIT),
-	SMU_CAP(RMA_MSG),
-	SMU_CAP(ACA_SYND),
-	SMU_CAP(SDMA_RESET),
-	SMU_CAP(STATIC_METRICS),
-	SMU_CAP(ALL),
-};
-
 struct mca_bank_ipid {
 	enum amdgpu_mca_ip ip;
 	uint16_t hwid;
@@ -194,6 +176,8 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
 	MSG_MAP(RmaDueToBadPageThreshold,            PPSMC_MSG_RmaDueToBadPageThreshold,        0),
 	MSG_MAP(SetThrottlingPolicy,                 PPSMC_MSG_SetThrottlingPolicy,             0),
 	MSG_MAP(ResetSDMA,                           PPSMC_MSG_ResetSDMA,                       0),
+	MSG_MAP(ResetVCN,                            PPSMC_MSG_ResetVCN,                       0),
+	MSG_MAP(GetStaticMetricsTable,               PPSMC_MSG_GetStaticMetricsTable,           0),
 };
 
 // clang-format on
@@ -299,8 +283,8 @@ static inline void smu_v13_0_6_cap_clear(struct smu_context *smu,
 	dpm_context->caps &= ~BIT_ULL(cap);
 }
 
-static inline bool smu_v13_0_6_cap_supported(struct smu_context *smu,
-					     enum smu_v13_0_6_caps cap)
+bool smu_v13_0_6_cap_supported(struct smu_context *smu,
+			       enum smu_v13_0_6_caps cap)
 {
 	struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
 
@@ -353,6 +337,9 @@ static void smu_v13_0_12_init_caps(struct smu_context *smu)
 
 	if (fw_ver >= 0x00561E00)
 		smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS));
+
+	if (fw_ver >= 0x00562500)
+		smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS));
 }
 
 static void smu_v13_0_6_init_caps(struct smu_context *smu)
@@ -402,6 +389,13 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu)
 			smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG));
 		if (fw_ver < 0x00555600)
 			smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND));
+		if ((pgm == 7 && fw_ver >= 0x7550E00) ||
+		    (pgm == 0 && fw_ver >= 0x00557E00))
+			smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS));
+		if (fw_ver >= 0x00557F01) {
+			smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS));
+			smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE));
+		}
 	}
 	if (((pgm == 7) && (fw_ver >= 0x7550700)) ||
 	    ((pgm == 0) && (fw_ver >= 0x00557900)) ||
@@ -525,7 +519,7 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
 		return -ENOMEM;
 	smu_table->metrics_time = 0;
 
-	smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_7);
+	smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_8);
 	smu_table->gpu_metrics_table =
 		kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
 	if (!smu_table->gpu_metrics_table) {
@@ -747,9 +741,43 @@ static ssize_t smu_v13_0_6_get_pm_metrics(struct smu_context *smu,
 	return pm_metrics->common_header.structure_size;
 }
 
+static void smu_v13_0_6_fill_static_metrics_table(struct smu_context *smu,
+						  StaticMetricsTable_t *static_metrics)
+{
+	struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
+
+	if (!static_metrics->InputTelemetryVoltageInmV) {
+		dev_warn(smu->adev->dev, "Invalid board voltage %d\n",
+				static_metrics->InputTelemetryVoltageInmV);
+	}
+
+	dpm_context->board_volt = static_metrics->InputTelemetryVoltageInmV;
+}
+
+int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu)
+{
+	struct smu_table_context *smu_table = &smu->smu_table;
+	uint32_t table_size = smu_table->tables[SMU_TABLE_SMU_METRICS].size;
+	struct smu_table *table = &smu_table->driver_table;
+	int ret;
+
+	ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetStaticMetricsTable, NULL);
+	if (ret) {
+		dev_info(smu->adev->dev,
+				"Failed to export static metrics table!\n");
+		return ret;
+	}
+
+	amdgpu_asic_invalidate_hdp(smu->adev, NULL);
+	memcpy(smu_table->metrics_table, table->cpu_addr, table_size);
+
+	return 0;
+}
+
 static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
+	StaticMetricsTable_t *static_metrics = (StaticMetricsTable_t *)smu_table->metrics_table;
 	MetricsTableV0_t *metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table;
 	MetricsTableV1_t *metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table;
 	MetricsTableV2_t *metrics_v2 = (MetricsTableV2_t *)smu_table->metrics_table;
@@ -759,7 +787,8 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
 	int ret, i, retry = 100;
 	uint32_t table_version;
 
-	if (smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS)))
+	if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) &&
+	    smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS)))
 		return smu_v13_0_12_setup_driver_pptable(smu);
 
 	/* Store one-time values in driver PPTable */
@@ -813,6 +842,12 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
 			GET_METRIC_FIELD(PublicSerialNumber_AID, version)[0];
 
 		pptable->Init = true;
+		if (smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) {
+			ret = smu_v13_0_6_get_static_metrics_table(smu);
+			if (ret)
+				return ret;
+			smu_v13_0_6_fill_static_metrics_table(smu, static_metrics);
+		}
 	}
 
 	return 0;
@@ -1142,7 +1177,8 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu,
 	if (ret)
 		return ret;
 
-	if (smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS)))
+	if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) &&
+	    smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS)))
 		return smu_v13_0_12_get_smu_metrics_data(smu, member, value);
 
 	/* For clocks with multiple instances, only report the first one */
@@ -1616,6 +1652,7 @@ static int smu_v13_0_6_read_sensor(struct smu_context *smu,
 				   enum amd_pp_sensors sensor, void *data,
 				   uint32_t *size)
 {
+	struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
 	int ret = 0;
 
 	if (amdgpu_ras_intr_triggered())
@@ -1660,6 +1697,15 @@ static int smu_v13_0_6_read_sensor(struct smu_context *smu,
 		ret = smu_v13_0_get_gfx_vdd(smu, (uint32_t *)data);
 		*size = 4;
 		break;
+	case AMDGPU_PP_SENSOR_VDDBOARD:
+		if (smu_v13_0_6_cap_supported(smu, SMU_CAP(BOARD_VOLTAGE))) {
+			*(uint32_t *)data = dpm_context->board_volt;
+			*size = 4;
+			break;
+		} else {
+			ret = -EOPNOTSUPP;
+			break;
+		}
 	case AMDGPU_PP_SENSOR_GPU_AVG_POWER:
 	default:
 		ret = -EOPNOTSUPP;
@@ -1927,7 +1973,7 @@ static int smu_v13_0_6_set_performance_level(struct smu_context *smu,
 				return ret;
 			pstate_table->uclk_pstate.curr.max = uclk_table->max;
 		}
-		pstate_table->uclk_pstate.custom.max = 0;
+		smu_v13_0_reset_custom_level(smu);
 
 		return 0;
 	case AMD_DPM_FORCED_LEVEL_MANUAL:
@@ -2140,7 +2186,7 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu,
 				smu, SMU_UCLK, min_clk, max_clk, false);
 			if (ret)
 				return ret;
-			pstate_table->uclk_pstate.custom.max = 0;
+			smu_v13_0_reset_custom_level(smu);
 		}
 		break;
 	case PP_OD_COMMIT_DPM_TABLE:
@@ -2486,8 +2532,8 @@ static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu)
 static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
-	struct gpu_metrics_v1_7 *gpu_metrics =
-		(struct gpu_metrics_v1_7 *)smu_table->gpu_metrics_table;
+	struct gpu_metrics_v1_8 *gpu_metrics =
+		(struct gpu_metrics_v1_8 *)smu_table->gpu_metrics_table;
 	int version = smu_v13_0_6_get_metrics_version(smu);
 	int ret = 0, xcc_id, inst, i, j, k, idx;
 	struct amdgpu_device *adev = smu->adev;
@@ -2507,13 +2553,14 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
 		return ret;
 	}
 
-	if (smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS)))
+	if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) &&
+	    smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS)))
 		return smu_v13_0_12_get_gpu_metrics(smu, table);
 
 	metrics_v1 = (MetricsTableV1_t *)metrics_v0;
 	metrics_v2 = (MetricsTableV2_t *)metrics_v0;
 
-	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 7);
+	smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 8);
 
 	gpu_metrics->temperature_hotspot =
 		SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, version));
@@ -2666,6 +2713,20 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
 				gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] =
 					SMUQ10_ROUND(GET_GPU_METRIC_FIELD(GfxBusyAcc,
 									  version)[inst]);
+				if (smu_v13_0_6_cap_supported(smu, SMU_CAP(HST_LIMIT_METRICS))) {
+					gpu_metrics->xcp_stats[i].gfx_below_host_limit_ppt_acc[idx] =
+						SMUQ10_ROUND
+						(metrics_v0->GfxclkBelowHostLimitPptAcc[inst]);
+					gpu_metrics->xcp_stats[i].gfx_below_host_limit_thm_acc[idx] =
+						SMUQ10_ROUND
+						(metrics_v0->GfxclkBelowHostLimitThmAcc[inst]);
+					gpu_metrics->xcp_stats[i].gfx_low_utilization_acc[idx] =
+						SMUQ10_ROUND
+						(metrics_v0->GfxclkLowUtilizationAcc[inst]);
+					gpu_metrics->xcp_stats[i].gfx_below_host_limit_total_acc[idx] =
+						SMUQ10_ROUND
+						(metrics_v0->GfxclkBelowHostLimitTotalAcc[inst]);
+				}
 				idx++;
 			}
 		}
@@ -2844,6 +2905,15 @@ static int smu_v13_0_6_mode1_reset(struct smu_context *smu)
 	return ret;
 }
 
+static int smu_v13_0_6_link_reset(struct smu_context *smu)
+{
+	int ret = 0;
+
+	ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
+					      SMU_RESET_MODE_4, NULL);
+	return ret;
+}
+
 static bool smu_v13_0_6_is_mode1_reset_supported(struct smu_context *smu)
 {
 	return true;
@@ -2854,6 +2924,17 @@ static bool smu_v13_0_6_is_mode2_reset_supported(struct smu_context *smu)
 	return true;
 }
 
+static inline bool smu_v13_0_6_is_link_reset_supported(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+	int var = (adev->pdev->device & 0xF);
+
+	if (var == 0x1)
+		return true;
+
+	return false;
+}
+
 static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu,
 						 uint32_t size)
 {
@@ -2924,6 +3005,19 @@ static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask)
 	return ret;
 }
 
+static int smu_v13_0_6_reset_vcn(struct smu_context *smu, uint32_t inst_mask)
+{
+	int ret = 0;
+
+	ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ResetVCN, inst_mask, NULL);
+	if (ret)
+		dev_err(smu->adev->dev,
+			"failed to send ResetVCN event with mask 0x%x\n",
+			inst_mask);
+	return ret;
+}
+
+
 static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
 {
 	struct smu_context *smu = adev->powerplay.pp_handle;
@@ -3587,8 +3681,10 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
 	.get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range,
 	.mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported,
 	.mode2_reset_is_support = smu_v13_0_6_is_mode2_reset_supported,
+	.link_reset_is_support = smu_v13_0_6_is_link_reset_supported,
 	.mode1_reset = smu_v13_0_6_mode1_reset,
 	.mode2_reset = smu_v13_0_6_mode2_reset,
+	.link_reset = smu_v13_0_6_link_reset,
 	.wait_for_event = smu_v13_0_wait_for_event,
 	.i2c_init = smu_v13_0_6_i2c_control_init,
 	.i2c_fini = smu_v13_0_6_i2c_control_fini,
@@ -3596,6 +3692,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
 	.send_rma_reason = smu_v13_0_6_send_rma_reason,
 	.reset_sdma = smu_v13_0_6_reset_sdma,
 	.reset_sdma_is_supported = smu_v13_0_6_reset_sdma_is_supported,
+	.dpm_reset_vcn = smu_v13_0_6_reset_vcn,
 };
 
 void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h
index 83745909e564..5313206ae4bb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h
@@ -26,6 +26,7 @@
 #define SMU_13_0_6_UMD_PSTATE_GFXCLK_LEVEL 0x2
 #define SMU_13_0_6_UMD_PSTATE_SOCCLK_LEVEL 0x4
 #define SMU_13_0_6_UMD_PSTATE_MCLK_LEVEL 0x2
+#define SMU_CAP(x) SMU_13_0_6_CAPS_##x
 
 typedef enum {
 /*0*/   METRICS_VERSION_V0                  = 0,
@@ -51,6 +52,26 @@ struct PPTable_t {
 	bool Init;
 };
 
+enum smu_v13_0_6_caps {
+	SMU_CAP(DPM),
+	SMU_CAP(DPM_POLICY),
+	SMU_CAP(OTHER_END_METRICS),
+	SMU_CAP(SET_UCLK_MAX),
+	SMU_CAP(PCIE_METRICS),
+	SMU_CAP(MCA_DEBUG_MODE),
+	SMU_CAP(PER_INST_METRICS),
+	SMU_CAP(CTF_LIMIT),
+	SMU_CAP(RMA_MSG),
+	SMU_CAP(ACA_SYND),
+	SMU_CAP(SDMA_RESET),
+	SMU_CAP(STATIC_METRICS),
+	SMU_CAP(HST_LIMIT_METRICS),
+	SMU_CAP(BOARD_VOLTAGE),
+	SMU_CAP(ALL),
+};
+
 extern void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu);
+bool smu_v13_0_6_cap_supported(struct smu_context *smu, enum smu_v13_0_6_caps cap);
+int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu);
 
 #endif
diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c
index 57828f2b7b5a..56c7e3318f01 100644
--- a/drivers/gpu/drm/display/drm_dp_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_helper.c
@@ -4393,8 +4393,9 @@ EXPORT_SYMBOL(drm_panel_dp_aux_backlight);
 #endif
 
 /* See DP Standard v2.1 2.6.4.4.1.1, 2.8.4.4, 2.8.7 */
-static int drm_dp_link_symbol_cycles(int lane_count, int pixels, int bpp_x16,
-				     int symbol_size, bool is_mst)
+static int drm_dp_link_data_symbol_cycles(int lane_count, int pixels,
+					  int bpp_x16, int symbol_size,
+					  bool is_mst)
 {
 	int cycles = DIV_ROUND_UP(pixels * bpp_x16, 16 * symbol_size * lane_count);
 	int align = is_mst ? 4 / lane_count : 1;
@@ -4402,22 +4403,42 @@ static int drm_dp_link_symbol_cycles(int lane_count, int pixels, int bpp_x16,
 	return ALIGN(cycles, align);
 }
 
-static int drm_dp_link_dsc_symbol_cycles(int lane_count, int pixels, int slice_count,
-					 int bpp_x16, int symbol_size, bool is_mst)
+/**
+ * drm_dp_link_symbol_cycles - calculate the link symbol count with/without dsc
+ * @lane_count: DP link lane count
+ * @pixels: number of pixels in a scanline
+ * @dsc_slice_count: number of slices for DSC or '0' for non-DSC
+ * @bpp_x16: bits per pixel in .4 binary fixed format
+ * @symbol_size: DP symbol size
+ * @is_mst: %true for MST and %false for SST
+ *
+ * Calculate the link symbol cycles for both DSC (@dsc_slice_count !=0) and
+ * non-DSC case (@dsc_slice_count == 0) and return the count.
+ */
+int drm_dp_link_symbol_cycles(int lane_count, int pixels, int dsc_slice_count,
+			      int bpp_x16, int symbol_size, bool is_mst)
 {
+	int slice_count = dsc_slice_count ? : 1;
 	int slice_pixels = DIV_ROUND_UP(pixels, slice_count);
-	int slice_data_cycles = drm_dp_link_symbol_cycles(lane_count, slice_pixels,
-							  bpp_x16, symbol_size, is_mst);
-	int slice_eoc_cycles = is_mst ? 4 / lane_count : 1;
+	int slice_data_cycles = drm_dp_link_data_symbol_cycles(lane_count,
+							       slice_pixels,
+							       bpp_x16,
+							       symbol_size,
+							       is_mst);
+	int slice_eoc_cycles = 0;
+
+	if (dsc_slice_count)
+		slice_eoc_cycles = is_mst ? 4 / lane_count : 1;
 
 	return slice_count * (slice_data_cycles + slice_eoc_cycles);
 }
+EXPORT_SYMBOL(drm_dp_link_symbol_cycles);
 
 /**
  * drm_dp_bw_overhead - Calculate the BW overhead of a DP link stream
  * @lane_count: DP link lane count
  * @hactive: pixel count of the active period in one scanline of the stream
- * @dsc_slice_count: DSC slice count if @flags/DRM_DP_LINK_BW_OVERHEAD_DSC is set
+ * @dsc_slice_count: number of slices for DSC or '0' for non-DSC
  * @bpp_x16: bits per pixel in .4 binary fixed point
  * @flags: DRM_DP_OVERHEAD_x flags
  *
@@ -4431,7 +4452,7 @@ static int drm_dp_link_dsc_symbol_cycles(int lane_count, int pixels, int slice_c
  * as well as the stream's
  * - @hactive timing
  * - @bpp_x16 color depth
- * - compression mode (@flags / %DRM_DP_OVERHEAD_DSC).
+ * - compression mode (@dsc_slice_count != 0)
  * Note that this overhead doesn't account for the 8b/10b, 128b/132b
  * channel coding efficiency, for that see
  * @drm_dp_link_bw_channel_coding_efficiency().
@@ -4486,15 +4507,10 @@ int drm_dp_bw_overhead(int lane_count, int hactive,
 	WARN_ON((flags & DRM_DP_BW_OVERHEAD_UHBR) &&
 		(flags & DRM_DP_BW_OVERHEAD_FEC));
 
-	if (flags & DRM_DP_BW_OVERHEAD_DSC)
-		symbol_cycles = drm_dp_link_dsc_symbol_cycles(lane_count, hactive,
-							      dsc_slice_count,
-							      bpp_x16, symbol_size,
-							      is_mst);
-	else
-		symbol_cycles = drm_dp_link_symbol_cycles(lane_count, hactive,
-							  bpp_x16, symbol_size,
-							  is_mst);
+	symbol_cycles = drm_dp_link_symbol_cycles(lane_count, hactive,
+						  dsc_slice_count,
+						  bpp_x16, symbol_size,
+						  is_mst);
 
 	return DIV_ROUND_UP_ULL(mul_u32_u32(symbol_cycles * symbol_size * lane_count,
 					    overhead * 16),
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index cf2463090d3a..246cf845e2c9 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -993,6 +993,40 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f)
 EXPORT_SYMBOL(drm_show_fdinfo);
 
 /**
+ * drm_file_err - log process name, pid and client_name associated with a drm_file
+ * @file_priv: context of interest for process name and pid
+ * @fmt: printf() like format string
+ *
+ * Helper function for clients which needs to log process details such
+ * as name and pid etc along with user logs.
+ */
+void drm_file_err(struct drm_file *file_priv, const char *fmt, ...)
+{
+	va_list args;
+	struct va_format vaf;
+	struct pid *pid;
+	struct task_struct *task;
+	struct drm_device *dev = file_priv->minor->dev;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	mutex_lock(&file_priv->client_name_lock);
+	rcu_read_lock();
+	pid = rcu_dereference(file_priv->pid);
+	task = pid_task(pid, PIDTYPE_TGID);
+
+	drm_err(dev, "comm: %s pid: %d client: %s ... %pV", task ? task->comm : "Unset",
+		task ? task->pid : 0, file_priv->client_name ?: "Unset", &vaf);
+
+	va_end(args);
+	rcu_read_unlock();
+	mutex_unlock(&file_priv->client_name_lock);
+}
+EXPORT_SYMBOL(drm_file_err);
+
+/**
  * mock_drm_getfile - Create a new struct file for the drm device
  * @minor: drm minor to wrap (e.g. #drm_device.primary)
  * @flags: file creation mode (O_RDWR etc)
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c8fc271b33b7..13d4a16f7d33 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -52,7 +52,6 @@ i915-y += \
 i915-y += \
 	soc/intel_dram.o \
 	soc/intel_gmch.o \
-	soc/intel_pch.o \
 	soc/intel_rom.o
 
 # core library code
@@ -282,6 +281,7 @@ i915-y += \
 	display/intel_modeset_setup.o \
 	display/intel_modeset_verify.o \
 	display/intel_overlay.o \
+	display/intel_pch.o \
 	display/intel_pch_display.o \
 	display/intel_pch_refclk.o \
 	display/intel_plane_initial.o \
diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c
index b39aae9165df..e0a98e6fd6d1 100644
--- a/drivers/gpu/drm/i915/display/g4x_dp.c
+++ b/drivers/gpu/drm/i915/display/g4x_dp.c
@@ -7,9 +7,11 @@
 
 #include <linux/string_helpers.h>
 
+#include <drm/drm_print.h>
+
 #include "g4x_dp.h"
-#include "i915_drv.h"
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "intel_audio.h"
 #include "intel_backlight.h"
 #include "intel_connector.h"
@@ -28,7 +30,6 @@
 #include "intel_hotplug.h"
 #include "intel_pch_display.h"
 #include "intel_pps.h"
-#include "vlv_sideband.h"
 
 static const struct dpll g4x_dpll[] = {
 	{ .dot = 162000, .p1 = 2, .p2 = 10, .n = 2, .m1 = 23, .m2 = 8, },
@@ -60,14 +61,13 @@ static void g4x_dp_set_clock(struct intel_encoder *encoder,
 			     struct intel_crtc_state *pipe_config)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	const struct dpll *divisor = NULL;
 	int i, count = 0;
 
 	if (display->platform.g4x) {
 		divisor = g4x_dpll;
 		count = ARRAY_SIZE(g4x_dpll);
-	} else if (HAS_PCH_SPLIT(dev_priv)) {
+	} else if (HAS_PCH_SPLIT(display)) {
 		divisor = pch_dpll;
 		count = ARRAY_SIZE(pch_dpll);
 	} else if (display->platform.cherryview) {
@@ -93,7 +93,6 @@ static void intel_dp_prepare(struct intel_encoder *encoder,
 			     const struct intel_crtc_state *pipe_config)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	enum port port = encoder->port;
 	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
@@ -141,7 +140,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder,
 			intel_dp->DP |= DP_ENHANCED_FRAMING;
 
 		intel_dp->DP |= DP_PIPE_SEL_IVB(crtc->pipe);
-	} else if (HAS_PCH_CPT(dev_priv) && port != PORT_A) {
+	} else if (HAS_PCH_CPT(display) && port != PORT_A) {
 		intel_dp->DP |= DP_LINK_TRAIN_OFF_CPT;
 
 		intel_de_rmw(display, TRANS_DP_CTL(crtc->pipe),
@@ -183,7 +182,7 @@ static void assert_dp_port(struct intel_dp *intel_dp, bool state)
 
 static void assert_edp_pll(struct intel_display *display, bool state)
 {
-	bool cur_state = intel_de_read(display, DP_A) & DP_PLL_ENABLE;
+	bool cur_state = intel_de_read(display, DP_A) & EDP_PLL_ENABLE;
 
 	INTEL_DISPLAY_STATE_WARN(display, cur_state != state,
 				 "eDP PLL state assertion failure (expected %s, current %s)\n",
@@ -205,12 +204,12 @@ static void ilk_edp_pll_on(struct intel_dp *intel_dp,
 	drm_dbg_kms(display->drm, "enabling eDP PLL for clock %d\n",
 		    pipe_config->port_clock);
 
-	intel_dp->DP &= ~DP_PLL_FREQ_MASK;
+	intel_dp->DP &= ~EDP_PLL_FREQ_MASK;
 
 	if (pipe_config->port_clock == 162000)
-		intel_dp->DP |= DP_PLL_FREQ_162MHZ;
+		intel_dp->DP |= EDP_PLL_FREQ_162MHZ;
 	else
-		intel_dp->DP |= DP_PLL_FREQ_270MHZ;
+		intel_dp->DP |= EDP_PLL_FREQ_270MHZ;
 
 	intel_de_write(display, DP_A, intel_dp->DP);
 	intel_de_posting_read(display, DP_A);
@@ -225,7 +224,7 @@ static void ilk_edp_pll_on(struct intel_dp *intel_dp,
 	if (display->platform.ironlake)
 		intel_wait_for_vblank_if_active(display, !crtc->pipe);
 
-	intel_dp->DP |= DP_PLL_ENABLE;
+	intel_dp->DP |= EDP_PLL_ENABLE;
 
 	intel_de_write(display, DP_A, intel_dp->DP);
 	intel_de_posting_read(display, DP_A);
@@ -243,7 +242,7 @@ static void ilk_edp_pll_off(struct intel_dp *intel_dp,
 
 	drm_dbg_kms(display->drm, "disabling eDP PLL\n");
 
-	intel_dp->DP &= ~DP_PLL_ENABLE;
+	intel_dp->DP &= ~EDP_PLL_ENABLE;
 
 	intel_de_write(display, DP_A, intel_dp->DP);
 	intel_de_posting_read(display, DP_A);
@@ -277,7 +276,6 @@ bool g4x_dp_port_enabled(struct intel_display *display,
 			 i915_reg_t dp_reg, enum port port,
 			 enum pipe *pipe)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	bool ret;
 	u32 val;
 
@@ -287,13 +285,13 @@ bool g4x_dp_port_enabled(struct intel_display *display,
 
 	/* asserts want to know the pipe even if the port is disabled */
 	if (display->platform.ivybridge && port == PORT_A)
-		*pipe = (val & DP_PIPE_SEL_MASK_IVB) >> DP_PIPE_SEL_SHIFT_IVB;
-	else if (HAS_PCH_CPT(dev_priv) && port != PORT_A)
+		*pipe = REG_FIELD_GET(DP_PIPE_SEL_MASK_IVB, val);
+	else if (HAS_PCH_CPT(display) && port != PORT_A)
 		ret &= cpt_dp_port_selected(display, port, pipe);
 	else if (display->platform.cherryview)
-		*pipe = (val & DP_PIPE_SEL_MASK_CHV) >> DP_PIPE_SEL_SHIFT_CHV;
+		*pipe = REG_FIELD_GET(DP_PIPE_SEL_MASK_CHV, val);
 	else
-		*pipe = (val & DP_PIPE_SEL_MASK) >> DP_PIPE_SEL_SHIFT;
+		*pipe = REG_FIELD_GET(DP_PIPE_SEL_MASK, val);
 
 	return ret;
 }
@@ -338,7 +336,6 @@ static void intel_dp_get_config(struct intel_encoder *encoder,
 				struct intel_crtc_state *pipe_config)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	u32 tmp, flags = 0;
 	enum port port = encoder->port;
@@ -353,7 +350,7 @@ static void intel_dp_get_config(struct intel_encoder *encoder,
 
 	pipe_config->has_audio = tmp & DP_AUDIO_OUTPUT_ENABLE && port != PORT_A;
 
-	if (HAS_PCH_CPT(dev_priv) && port != PORT_A) {
+	if (HAS_PCH_CPT(display) && port != PORT_A) {
 		u32 trans_dp = intel_de_read(display,
 					     TRANS_DP_CTL(crtc->pipe));
 
@@ -389,13 +386,12 @@ static void intel_dp_get_config(struct intel_encoder *encoder,
 	if (display->platform.g4x && tmp & DP_COLOR_RANGE_16_235)
 		pipe_config->limited_color_range = true;
 
-	pipe_config->lane_count =
-		((tmp & DP_PORT_WIDTH_MASK) >> DP_PORT_WIDTH_SHIFT) + 1;
+	pipe_config->lane_count = REG_FIELD_GET(DP_PORT_WIDTH_MASK, tmp) + 1;
 
 	g4x_dp_get_m_n(pipe_config);
 
 	if (port == PORT_A) {
-		if ((intel_de_read(display, DP_A) & DP_PLL_FREQ_MASK) == DP_PLL_FREQ_162MHZ)
+		if ((intel_de_read(display, DP_A) & EDP_PLL_FREQ_MASK) == EDP_PLL_FREQ_162MHZ)
 			pipe_config->port_clock = 162000;
 		else
 			pipe_config->port_clock = 270000;
@@ -416,7 +412,6 @@ intel_dp_link_down(struct intel_encoder *encoder,
 		   const struct intel_crtc_state *old_crtc_state)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	enum port port = encoder->port;
@@ -429,7 +424,7 @@ intel_dp_link_down(struct intel_encoder *encoder,
 	drm_dbg_kms(display->drm, "\n");
 
 	if ((display->platform.ivybridge && port == PORT_A) ||
-	    (HAS_PCH_CPT(dev_priv) && port != PORT_A)) {
+	    (HAS_PCH_CPT(display) && port != PORT_A)) {
 		intel_dp->DP &= ~DP_LINK_TRAIN_MASK_CPT;
 		intel_dp->DP |= DP_LINK_TRAIN_PAT_IDLE_CPT;
 	} else {
@@ -448,7 +443,7 @@ intel_dp_link_down(struct intel_encoder *encoder,
 	 * to transcoder A after disabling it to allow the
 	 * matching HDMI port to be enabled on transcoder A.
 	 */
-	if (HAS_PCH_IBX(dev_priv) && crtc->pipe == PIPE_B && port != PORT_A) {
+	if (HAS_PCH_IBX(display) && crtc->pipe == PIPE_B && port != PORT_A) {
 		/*
 		 * We get CPU/PCH FIFO underruns on the other pipe when
 		 * doing the workaround. Sweep them under the rug.
@@ -581,16 +576,10 @@ static void chv_post_disable_dp(struct intel_atomic_state *state,
 				const struct intel_crtc_state *old_crtc_state,
 				const struct drm_connector_state *old_conn_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-
 	intel_dp_link_down(encoder, old_crtc_state);
 
-	vlv_dpio_get(dev_priv);
-
 	/* Assert data lane reset */
 	chv_data_lane_soft_reset(encoder, old_crtc_state, true);
-
-	vlv_dpio_put(dev_priv);
 }
 
 static void
@@ -1223,10 +1212,10 @@ static int g4x_dp_compute_config(struct intel_encoder *encoder,
 				 struct intel_crtc_state *crtc_state,
 				 struct drm_connector_state *conn_state)
 {
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	struct intel_display *display = to_intel_display(encoder);
 	int ret;
 
-	if (HAS_PCH_SPLIT(i915) && encoder->port != PORT_A)
+	if (HAS_PCH_SPLIT(display) && encoder->port != PORT_A)
 		crtc_state->has_pch_encoder = true;
 
 	ret = intel_dp_compute_config(encoder, crtc_state, conn_state);
@@ -1279,7 +1268,6 @@ static const struct drm_encoder_funcs intel_dp_enc_funcs = {
 bool g4x_dp_init(struct intel_display *display,
 		 i915_reg_t output_reg, enum port port)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	const struct intel_bios_encoder_data *devdata;
 	struct intel_digital_port *dig_port;
 	struct intel_encoder *intel_encoder;
@@ -1353,7 +1341,7 @@ bool g4x_dp_init(struct intel_display *display,
 	intel_encoder->audio_disable = g4x_dp_audio_disable;
 
 	if ((display->platform.ivybridge && port == PORT_A) ||
-	    (HAS_PCH_CPT(dev_priv) && port != PORT_A))
+	    (HAS_PCH_CPT(display) && port != PORT_A))
 		dig_port->dp.set_link_train = cpt_set_link_train;
 	else
 		dig_port->dp.set_link_train = g4x_set_link_train;
@@ -1370,7 +1358,7 @@ bool g4x_dp_init(struct intel_display *display,
 		intel_encoder->set_signal_levels = g4x_set_signal_levels;
 
 	if (display->platform.valleyview || display->platform.cherryview ||
-	    (HAS_PCH_SPLIT(dev_priv) && port != PORT_A)) {
+	    (HAS_PCH_SPLIT(display) && port != PORT_A)) {
 		dig_port->dp.preemph_max = intel_dp_preemph_max_3;
 		dig_port->dp.voltage_max = intel_dp_voltage_max_3;
 	} else {
diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c
index 3dc2c59a3df0..1d252432d729 100644
--- a/drivers/gpu/drm/i915/display/g4x_hdmi.c
+++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c
@@ -5,8 +5,9 @@
  * HDMI support for G4x,ILK,SNB,IVB,VLV,CHV (HSW+ handled by the DDI code).
  */
 
+#include <drm/drm_print.h>
+
 #include "g4x_hdmi.h"
-#include "i915_drv.h"
 #include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_audio.h"
@@ -22,13 +23,11 @@
 #include "intel_hdmi.h"
 #include "intel_hotplug.h"
 #include "intel_sdvo.h"
-#include "vlv_sideband.h"
 
 static void intel_hdmi_prepare(struct intel_encoder *encoder,
 			       const struct intel_crtc_state *crtc_state)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
@@ -37,7 +36,7 @@ static void intel_hdmi_prepare(struct intel_encoder *encoder,
 	intel_dp_dual_mode_set_tmds_output(intel_hdmi, true);
 
 	hdmi_val = SDVO_ENCODING_HDMI;
-	if (!HAS_PCH_SPLIT(dev_priv) && crtc_state->limited_color_range)
+	if (!HAS_PCH_SPLIT(display) && crtc_state->limited_color_range)
 		hdmi_val |= HDMI_COLOR_RANGE_16_235;
 	if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
 		hdmi_val |= SDVO_VSYNC_ACTIVE_HIGH;
@@ -52,7 +51,7 @@ static void intel_hdmi_prepare(struct intel_encoder *encoder,
 	if (crtc_state->has_hdmi_sink)
 		hdmi_val |= HDMI_MODE_SELECT_HDMI;
 
-	if (HAS_PCH_CPT(dev_priv))
+	if (HAS_PCH_CPT(display))
 		hdmi_val |= SDVO_PIPE_SEL_CPT(crtc->pipe);
 	else if (display->platform.cherryview)
 		hdmi_val |= SDVO_PIPE_SEL_CHV(crtc->pipe);
@@ -134,9 +133,8 @@ static int g4x_hdmi_compute_config(struct intel_encoder *encoder,
 	struct intel_display *display = to_intel_display(encoder);
 	struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 
-	if (HAS_PCH_SPLIT(i915)) {
+	if (HAS_PCH_SPLIT(display)) {
 		crtc_state->has_pch_encoder = true;
 		if (!intel_fdi_compute_pipe_bpp(crtc_state))
 			return -EINVAL;
@@ -155,7 +153,6 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder,
 				  struct intel_crtc_state *pipe_config)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	u32 tmp, flags = 0;
 	int dotclock;
@@ -186,7 +183,7 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder,
 	if (tmp & HDMI_AUDIO_ENABLE)
 		pipe_config->has_audio = true;
 
-	if (!HAS_PCH_SPLIT(dev_priv) &&
+	if (!HAS_PCH_SPLIT(display) &&
 	    tmp & HDMI_COLOR_RANGE_16_235)
 		pipe_config->limited_color_range = true;
 
@@ -383,7 +380,6 @@ static void intel_disable_hdmi(struct intel_atomic_state *state,
 			       const struct drm_connector_state *old_conn_state)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	struct intel_digital_port *dig_port =
 		hdmi_to_dig_port(intel_hdmi);
@@ -401,7 +397,7 @@ static void intel_disable_hdmi(struct intel_atomic_state *state,
 	 * to transcoder A after disabling it to allow the
 	 * matching DP port to be enabled on transcoder A.
 	 */
-	if (HAS_PCH_IBX(dev_priv) && crtc->pipe == PIPE_B) {
+	if (HAS_PCH_IBX(display) && crtc->pipe == PIPE_B) {
 		/*
 		 * We get CPU/PCH FIFO underruns on the other pipe when
 		 * doing the workaround. Sweep them under the rug.
@@ -539,15 +535,8 @@ static void chv_hdmi_post_disable(struct intel_atomic_state *state,
 				  const struct intel_crtc_state *old_crtc_state,
 				  const struct drm_connector_state *old_conn_state)
 {
-	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
-	vlv_dpio_get(dev_priv);
-
 	/* Assert data lane reset */
 	chv_data_lane_soft_reset(encoder, old_crtc_state, true);
-
-	vlv_dpio_put(dev_priv);
 }
 
 static void chv_hdmi_pre_enable(struct intel_atomic_state *state,
@@ -682,7 +671,6 @@ static bool assert_hdmi_port_valid(struct intel_display *display, enum port port
 bool g4x_hdmi_init(struct intel_display *display,
 		   i915_reg_t hdmi_reg, enum port port)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	const struct intel_bios_encoder_data *devdata;
 	struct intel_digital_port *dig_port;
 	struct intel_encoder *intel_encoder;
@@ -724,7 +712,7 @@ bool g4x_hdmi_init(struct intel_display *display,
 
 	intel_encoder->hotplug = intel_hdmi_hotplug;
 	intel_encoder->compute_config = g4x_hdmi_compute_config;
-	if (HAS_PCH_SPLIT(dev_priv)) {
+	if (HAS_PCH_SPLIT(display)) {
 		intel_encoder->disable = pch_disable_hdmi;
 		intel_encoder->post_disable = pch_post_disable_hdmi;
 	} else {
@@ -745,9 +733,9 @@ bool g4x_hdmi_init(struct intel_display *display,
 		intel_encoder->post_disable = vlv_hdmi_post_disable;
 	} else {
 		intel_encoder->pre_enable = intel_hdmi_pre_enable;
-		if (HAS_PCH_CPT(dev_priv))
+		if (HAS_PCH_CPT(display))
 			intel_encoder->enable = cpt_enable_hdmi;
-		else if (HAS_PCH_IBX(dev_priv))
+		else if (HAS_PCH_IBX(display))
 			intel_encoder->enable = ibx_enable_hdmi;
 		else
 			intel_encoder->enable = g4x_enable_hdmi;
diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c
index 75780fefe0ce..a2a6d52be0a5 100644
--- a/drivers/gpu/drm/i915/display/i9xx_plane.c
+++ b/drivers/gpu/drm/i915/display/i9xx_plane.c
@@ -7,9 +7,10 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_blend.h>
 #include <drm/drm_fourcc.h>
+#include <drm/drm_print.h>
 
-#include "i915_drv.h"
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "i9xx_plane.h"
 #include "i9xx_plane_regs.h"
 #include "intel_atomic.h"
@@ -631,92 +632,84 @@ static void
 bdw_primary_enable_flip_done(struct intel_plane *plane)
 {
 	struct intel_display *display = to_intel_display(plane);
-	struct drm_i915_private *i915 = to_i915(plane->base.dev);
 	enum pipe pipe = plane->pipe;
 
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	bdw_enable_pipe_irq(display, pipe, GEN8_PIPE_PRIMARY_FLIP_DONE);
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 static void
 bdw_primary_disable_flip_done(struct intel_plane *plane)
 {
 	struct intel_display *display = to_intel_display(plane);
-	struct drm_i915_private *i915 = to_i915(plane->base.dev);
 	enum pipe pipe = plane->pipe;
 
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	bdw_disable_pipe_irq(display, pipe, GEN8_PIPE_PRIMARY_FLIP_DONE);
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 static void
 ivb_primary_enable_flip_done(struct intel_plane *plane)
 {
 	struct intel_display *display = to_intel_display(plane);
-	struct drm_i915_private *i915 = to_i915(plane->base.dev);
 
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	ilk_enable_display_irq(display, DE_PLANE_FLIP_DONE_IVB(plane->i9xx_plane));
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 static void
 ivb_primary_disable_flip_done(struct intel_plane *plane)
 {
 	struct intel_display *display = to_intel_display(plane);
-	struct drm_i915_private *i915 = to_i915(plane->base.dev);
 
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	ilk_disable_display_irq(display, DE_PLANE_FLIP_DONE_IVB(plane->i9xx_plane));
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 static void
 ilk_primary_enable_flip_done(struct intel_plane *plane)
 {
 	struct intel_display *display = to_intel_display(plane);
-	struct drm_i915_private *i915 = to_i915(plane->base.dev);
 
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	ilk_enable_display_irq(display, DE_PLANE_FLIP_DONE(plane->i9xx_plane));
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 static void
 ilk_primary_disable_flip_done(struct intel_plane *plane)
 {
 	struct intel_display *display = to_intel_display(plane);
-	struct drm_i915_private *i915 = to_i915(plane->base.dev);
 
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	ilk_disable_display_irq(display, DE_PLANE_FLIP_DONE(plane->i9xx_plane));
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 static void
 vlv_primary_enable_flip_done(struct intel_plane *plane)
 {
 	struct intel_display *display = to_intel_display(plane);
-	struct drm_i915_private *i915 = to_i915(plane->base.dev);
 	enum pipe pipe = plane->pipe;
 
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	i915_enable_pipestat(display, pipe, PLANE_FLIP_DONE_INT_STATUS_VLV);
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 static void
 vlv_primary_disable_flip_done(struct intel_plane *plane)
 {
 	struct intel_display *display = to_intel_display(plane);
-	struct drm_i915_private *i915 = to_i915(plane->base.dev);
 	enum pipe pipe = plane->pipe;
 
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	i915_disable_pipestat(display, pipe, PLANE_FLIP_DONE_INT_STATUS_VLV);
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 static bool i9xx_plane_can_async_flip(u64 modifier)
diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c
index 40751f1547b7..77876ef735b7 100644
--- a/drivers/gpu/drm/i915/display/i9xx_wm.c
+++ b/drivers/gpu/drm/i915/display/i9xx_wm.c
@@ -4143,10 +4143,8 @@ static const struct intel_wm_funcs nop_funcs = {
 
 void i9xx_wm_init(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
 	/* For FIFO watermark updates */
-	if (HAS_PCH_SPLIT(dev_priv)) {
+	if (HAS_PCH_SPLIT(display)) {
 		ilk_setup_wm_latency(display);
 		display->funcs.wm = &ilk_wm_funcs;
 	} else if (display->platform.valleyview || display->platform.cherryview) {
diff --git a/drivers/gpu/drm/i915/display/intel_alpm.c b/drivers/gpu/drm/i915/display/intel_alpm.c
index 55f3ae1e68c9..1bf08b80c23f 100644
--- a/drivers/gpu/drm/i915/display/intel_alpm.c
+++ b/drivers/gpu/drm/i915/display/intel_alpm.c
@@ -5,12 +5,15 @@
 
 #include <linux/debugfs.h>
 
+#include <drm/drm_print.h>
+
 #include "intel_alpm.h"
 #include "intel_crtc.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
 #include "intel_dp.h"
 #include "intel_dp_aux.h"
+#include "intel_psr.h"
 #include "intel_psr_regs.h"
 
 bool intel_alpm_aux_wake_supported(struct intel_dp *intel_dp)
@@ -23,7 +26,7 @@ bool intel_alpm_aux_less_wake_supported(struct intel_dp *intel_dp)
 	return intel_dp->alpm_dpcd & DP_ALPM_AUX_LESS_CAP;
 }
 
-void intel_alpm_init_dpcd(struct intel_dp *intel_dp)
+void intel_alpm_init(struct intel_dp *intel_dp)
 {
 	u8 dpcd;
 
@@ -31,6 +34,7 @@ void intel_alpm_init_dpcd(struct intel_dp *intel_dp)
 		return;
 
 	intel_dp->alpm_dpcd = dpcd;
+	mutex_init(&intel_dp->alpm_parameters.lock);
 }
 
 /*
@@ -276,6 +280,14 @@ void intel_alpm_lobf_compute_config(struct intel_dp *intel_dp,
 	int waketime_in_lines, first_sdp_position;
 	int context_latency, guardband;
 
+	if (intel_dp->alpm_parameters.lobf_disable_debug) {
+		drm_dbg_kms(display->drm, "LOBF is disabled by debug flag\n");
+		return;
+	}
+
+	if (intel_dp->alpm_parameters.sink_alpm_error)
+		return;
+
 	if (!intel_dp_is_edp(intel_dp))
 		return;
 
@@ -288,6 +300,10 @@ void intel_alpm_lobf_compute_config(struct intel_dp *intel_dp,
 	if (crtc_state->has_psr)
 		return;
 
+	if (crtc_state->vrr.vmin != crtc_state->vrr.vmax ||
+	    crtc_state->vrr.vmin != crtc_state->vrr.flipline)
+		return;
+
 	if (!(intel_alpm_aux_wake_supported(intel_dp) ||
 	      intel_alpm_aux_less_wake_supported(intel_dp)))
 		return;
@@ -316,15 +332,16 @@ static void lnl_alpm_configure(struct intel_dp *intel_dp,
 	enum port port = dp_to_dig_port(intel_dp)->base.port;
 	u32 alpm_ctl;
 
-	if (DISPLAY_VER(display) < 20 ||
-	    (!intel_dp->psr.sel_update_enabled && !intel_dp_is_edp(intel_dp)))
+	if (DISPLAY_VER(display) < 20 || (!intel_psr_needs_alpm(intel_dp, crtc_state) &&
+					  !crtc_state->has_lobf))
 		return;
 
+	mutex_lock(&intel_dp->alpm_parameters.lock);
 	/*
 	 * Panel Replay on eDP is always using ALPM aux less. I.e. no need to
 	 * check panel support at this point.
 	 */
-	if ((intel_dp->psr.panel_replay_enabled && intel_dp_is_edp(intel_dp)) ||
+	if ((crtc_state->has_panel_replay && intel_dp_is_edp(intel_dp)) ||
 	    (crtc_state->has_lobf && intel_alpm_aux_less_wake_supported(intel_dp))) {
 		alpm_ctl = ALPM_CTL_ALPM_ENABLE |
 			ALPM_CTL_ALPM_AUX_LESS_ENABLE |
@@ -353,18 +370,107 @@ static void lnl_alpm_configure(struct intel_dp *intel_dp,
 			ALPM_CTL_EXTENDED_FAST_WAKE_TIME(intel_dp->alpm_parameters.fast_wake_lines);
 	}
 
-	if (crtc_state->has_lobf)
+	if (crtc_state->has_lobf) {
 		alpm_ctl |= ALPM_CTL_LOBF_ENABLE;
+		drm_dbg_kms(display->drm, "Link off between frames (LOBF) enabled\n");
+	}
 
 	alpm_ctl |= ALPM_CTL_ALPM_ENTRY_CHECK(intel_dp->alpm_parameters.check_entry_lines);
 
 	intel_de_write(display, ALPM_CTL(display, cpu_transcoder), alpm_ctl);
+	mutex_unlock(&intel_dp->alpm_parameters.lock);
 }
 
 void intel_alpm_configure(struct intel_dp *intel_dp,
 			  const struct intel_crtc_state *crtc_state)
 {
 	lnl_alpm_configure(intel_dp, crtc_state);
+	intel_dp->alpm_parameters.transcoder = crtc_state->cpu_transcoder;
+}
+
+void intel_alpm_pre_plane_update(struct intel_atomic_state *state,
+				 struct intel_crtc *crtc)
+{
+	struct intel_display *display = to_intel_display(state);
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
+	struct intel_encoder *encoder;
+
+	if (DISPLAY_VER(display) < 20)
+		return;
+
+	if (crtc_state->has_lobf || crtc_state->has_lobf == old_crtc_state->has_lobf)
+		return;
+
+	for_each_intel_encoder_mask(display->drm, encoder,
+				    crtc_state->uapi.encoder_mask) {
+		struct intel_dp *intel_dp;
+
+		if (!intel_encoder_is_dp(encoder))
+			continue;
+
+		intel_dp = enc_to_intel_dp(encoder);
+
+		if (!intel_dp_is_edp(intel_dp))
+			continue;
+
+		if (old_crtc_state->has_lobf) {
+			mutex_lock(&intel_dp->alpm_parameters.lock);
+			intel_de_write(display, ALPM_CTL(display, cpu_transcoder), 0);
+			drm_dbg_kms(display->drm, "Link off between frames (LOBF) disabled\n");
+			mutex_unlock(&intel_dp->alpm_parameters.lock);
+		}
+	}
+}
+
+static void intel_alpm_enable_sink(struct intel_dp *intel_dp,
+				   const struct intel_crtc_state *crtc_state)
+{
+	u8 val;
+
+	if (!intel_psr_needs_alpm(intel_dp, crtc_state) && !crtc_state->has_lobf)
+		return;
+
+	val = DP_ALPM_ENABLE | DP_ALPM_LOCK_ERROR_IRQ_HPD_ENABLE;
+
+	if (crtc_state->has_panel_replay || (crtc_state->has_lobf &&
+					     intel_alpm_aux_less_wake_supported(intel_dp)))
+		val |= DP_ALPM_MODE_AUX_LESS;
+
+	drm_dp_dpcd_writeb(&intel_dp->aux, DP_RECEIVER_ALPM_CONFIG, val);
+}
+
+void intel_alpm_post_plane_update(struct intel_atomic_state *state,
+				  struct intel_crtc *crtc)
+{
+	struct intel_display *display = to_intel_display(state);
+	const struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	struct intel_encoder *encoder;
+
+	if ((!crtc_state->has_lobf ||
+	     crtc_state->has_lobf == old_crtc_state->has_lobf) && !crtc_state->has_psr)
+		return;
+
+	for_each_intel_encoder_mask(display->drm, encoder,
+				    crtc_state->uapi.encoder_mask) {
+		struct intel_dp *intel_dp;
+
+		if (!intel_encoder_is_dp(encoder))
+			continue;
+
+		intel_dp = enc_to_intel_dp(encoder);
+
+		if (intel_dp_is_edp(intel_dp)) {
+			intel_alpm_enable_sink(intel_dp, crtc_state);
+			intel_alpm_configure(intel_dp, crtc_state);
+		}
+	}
 }
 
 static int i915_edp_lobf_info_show(struct seq_file *m, void *data)
@@ -403,6 +509,32 @@ out:
 
 DEFINE_SHOW_ATTRIBUTE(i915_edp_lobf_info);
 
+static int
+i915_edp_lobf_debug_get(void *data, u64 *val)
+{
+	struct intel_connector *connector = data;
+	struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
+
+	*val = intel_dp->alpm_parameters.lobf_disable_debug;
+
+	return 0;
+}
+
+static int
+i915_edp_lobf_debug_set(void *data, u64 val)
+{
+	struct intel_connector *connector = data;
+	struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
+
+	intel_dp->alpm_parameters.lobf_disable_debug = val;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_edp_lobf_debug_fops,
+			i915_edp_lobf_debug_get, i915_edp_lobf_debug_set,
+			"%llu\n");
+
 void intel_alpm_lobf_debugfs_add(struct intel_connector *connector)
 {
 	struct intel_display *display = to_intel_display(connector);
@@ -412,6 +544,55 @@ void intel_alpm_lobf_debugfs_add(struct intel_connector *connector)
 	    connector->base.connector_type != DRM_MODE_CONNECTOR_eDP)
 		return;
 
+	debugfs_create_file("i915_edp_lobf_debug", 0644, root,
+			    connector, &i915_edp_lobf_debug_fops);
+
 	debugfs_create_file("i915_edp_lobf_info", 0444, root,
 			    connector, &i915_edp_lobf_info_fops);
 }
+
+void intel_alpm_disable(struct intel_dp *intel_dp)
+{
+	struct intel_display *display = to_intel_display(intel_dp);
+	enum transcoder cpu_transcoder = intel_dp->alpm_parameters.transcoder;
+
+	if (DISPLAY_VER(display) < 20 || !intel_dp->alpm_dpcd)
+		return;
+
+	mutex_lock(&intel_dp->alpm_parameters.lock);
+
+	intel_de_rmw(display, ALPM_CTL(display, cpu_transcoder),
+		     ALPM_CTL_ALPM_ENABLE | ALPM_CTL_LOBF_ENABLE |
+		     ALPM_CTL_ALPM_AUX_LESS_ENABLE, 0);
+
+	intel_de_rmw(display,
+		     PORT_ALPM_CTL(cpu_transcoder),
+		     PORT_ALPM_CTL_ALPM_AUX_LESS_ENABLE, 0);
+
+	drm_dbg_kms(display->drm, "Disabling ALPM\n");
+	mutex_unlock(&intel_dp->alpm_parameters.lock);
+}
+
+bool intel_alpm_get_error(struct intel_dp *intel_dp)
+{
+	struct intel_display *display = to_intel_display(intel_dp);
+	struct drm_dp_aux *aux = &intel_dp->aux;
+	u8 val;
+	int r;
+
+	r = drm_dp_dpcd_readb(aux, DP_RECEIVER_ALPM_STATUS, &val);
+	if (r != 1) {
+		drm_err(display->drm, "Error reading ALPM status\n");
+		return true;
+	}
+
+	if (val & DP_ALPM_LOCK_TIMEOUT_ERROR) {
+		drm_dbg_kms(display->drm, "ALPM lock timeout error\n");
+
+		/* Clearing error */
+		drm_dp_dpcd_writeb(aux, DP_RECEIVER_ALPM_STATUS, val);
+		return true;
+	}
+
+	return false;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_alpm.h b/drivers/gpu/drm/i915/display/intel_alpm.h
index 8c409b10dce6..d7126d65b60f 100644
--- a/drivers/gpu/drm/i915/display/intel_alpm.h
+++ b/drivers/gpu/drm/i915/display/intel_alpm.h
@@ -12,8 +12,10 @@ struct intel_dp;
 struct intel_crtc_state;
 struct drm_connector_state;
 struct intel_connector;
+struct intel_atomic_state;
+struct intel_crtc;
 
-void intel_alpm_init_dpcd(struct intel_dp *intel_dp);
+void intel_alpm_init(struct intel_dp *intel_dp);
 bool intel_alpm_compute_params(struct intel_dp *intel_dp,
 			       const struct intel_crtc_state *crtc_state);
 void intel_alpm_lobf_compute_config(struct intel_dp *intel_dp,
@@ -21,7 +23,13 @@ void intel_alpm_lobf_compute_config(struct intel_dp *intel_dp,
 				    struct drm_connector_state *conn_state);
 void intel_alpm_configure(struct intel_dp *intel_dp,
 			  const struct intel_crtc_state *crtc_state);
+void intel_alpm_pre_plane_update(struct intel_atomic_state *state,
+				 struct intel_crtc *crtc);
+void intel_alpm_post_plane_update(struct intel_atomic_state *state,
+				  struct intel_crtc *crtc);
 void intel_alpm_lobf_debugfs_add(struct intel_connector *connector);
 bool intel_alpm_aux_wake_supported(struct intel_dp *intel_dp);
 bool intel_alpm_aux_less_wake_supported(struct intel_dp *intel_dp);
+void intel_alpm_disable(struct intel_dp *intel_dp);
+bool intel_alpm_get_error(struct intel_dp *intel_dp);
 #endif
diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c
index ea935a5d94c8..40d8bbd8107d 100644
--- a/drivers/gpu/drm/i915/display/intel_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_audio.c
@@ -27,9 +27,9 @@
 #include <drm/drm_edid.h>
 #include <drm/drm_eld.h>
 #include <drm/drm_fixed.h>
+#include <drm/drm_print.h>
 #include <drm/intel/i915_component.h>
 
-#include "i915_drv.h"
 #include "intel_atomic.h"
 #include "intel_audio.h"
 #include "intel_audio_regs.h"
@@ -587,19 +587,17 @@ static void ibx_audio_regs_init(struct intel_display *display,
 				enum pipe pipe,
 				struct ibx_audio_regs *regs)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
 	if (display->platform.valleyview || display->platform.cherryview) {
 		regs->hdmiw_hdmiedid = VLV_HDMIW_HDMIEDID(pipe);
 		regs->aud_config = VLV_AUD_CFG(pipe);
 		regs->aud_cntl_st = VLV_AUD_CNTL_ST(pipe);
 		regs->aud_cntrl_st2 = VLV_AUD_CNTL_ST2;
-	} else if (HAS_PCH_CPT(i915)) {
+	} else if (HAS_PCH_CPT(display)) {
 		regs->hdmiw_hdmiedid = CPT_HDMIW_HDMIEDID(pipe);
 		regs->aud_config = CPT_AUD_CFG(pipe);
 		regs->aud_cntl_st = CPT_AUD_CNTL_ST(pipe);
 		regs->aud_cntrl_st2 = CPT_AUD_CNTRL_ST2;
-	} else if (HAS_PCH_IBX(i915)) {
+	} else if (HAS_PCH_IBX(display)) {
 		regs->hdmiw_hdmiedid = IBX_HDMIW_HDMIEDID(pipe);
 		regs->aud_config = IBX_AUD_CFG(pipe);
 		regs->aud_cntl_st = IBX_AUD_CNTL_ST(pipe);
@@ -889,12 +887,10 @@ static const struct intel_audio_funcs hsw_audio_funcs = {
  */
 void intel_audio_hooks_init(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
 	if (display->platform.g4x)
 		display->funcs.audio = &g4x_audio_funcs;
 	else if (display->platform.valleyview || display->platform.cherryview ||
-		 HAS_PCH_CPT(i915) || HAS_PCH_IBX(i915))
+		 HAS_PCH_CPT(display) || HAS_PCH_IBX(display))
 		display->funcs.audio = &ibx_audio_funcs;
 	else if (display->platform.haswell || DISPLAY_VER(display) >= 8)
 		display->funcs.audio = &hsw_audio_funcs;
diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c
index 4f3fa966c537..5827da586003 100644
--- a/drivers/gpu/drm/i915/display/intel_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_backlight.c
@@ -10,8 +10,11 @@
 
 #include <acpi/video.h>
 
-#include "i915_drv.h"
+#include <drm/drm_file.h>
+#include <drm/drm_print.h>
+
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "intel_backlight.h"
 #include "intel_backlight_regs.h"
 #include "intel_connector.h"
@@ -473,7 +476,6 @@ static void lpt_enable_backlight(const struct intel_crtc_state *crtc_state,
 {
 	struct intel_connector *connector = to_intel_connector(conn_state->connector);
 	struct intel_display *display = to_intel_display(connector);
-	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct intel_panel *panel = &connector->panel;
 	u32 pch_ctl1, pch_ctl2;
 
@@ -486,7 +488,7 @@ static void lpt_enable_backlight(const struct intel_crtc_state *crtc_state,
 		intel_de_write(display, BLC_PWM_PCH_CTL1, pch_ctl1);
 	}
 
-	if (HAS_PCH_LPT(i915))
+	if (HAS_PCH_LPT(display))
 		intel_de_rmw(display, SOUTH_CHICKEN2, LPT_PWM_GRANULARITY,
 			     panel->backlight.alternate_pwm_increment ?
 			     LPT_PWM_GRANULARITY : 0);
@@ -503,7 +505,7 @@ static void lpt_enable_backlight(const struct intel_crtc_state *crtc_state,
 		pch_ctl1 |= BLM_PCH_POLARITY;
 
 	/* After LPT, override is the default. */
-	if (HAS_PCH_LPT(i915))
+	if (HAS_PCH_LPT(display))
 		pch_ctl1 |= BLM_PCH_OVERRIDE_ENABLE;
 
 	intel_de_write(display, BLC_PWM_PCH_CTL1, pch_ctl1);
@@ -1064,7 +1066,7 @@ static u32 spt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
  */
 static u32 lpt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
 {
-	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	struct intel_display *display = to_intel_display(connector);
 	struct intel_panel *panel = &connector->panel;
 	u32 mul, clock;
 
@@ -1073,7 +1075,7 @@ static u32 lpt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
 	else
 		mul = 128;
 
-	if (HAS_PCH_LPT_H(i915))
+	if (HAS_PCH_LPT_H(display))
 		clock = MHz(135); /* LPT:H */
 	else
 		clock = MHz(24); /* LPT:LP */
@@ -1230,12 +1232,11 @@ static u32 get_backlight_min_vbt(struct intel_connector *connector)
 static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unused)
 {
 	struct intel_display *display = to_intel_display(connector);
-	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 	struct intel_panel *panel = &connector->panel;
 	u32 cpu_ctl2, pch_ctl1, pch_ctl2, val;
 	bool alt, cpu_mode;
 
-	if (HAS_PCH_LPT(i915))
+	if (HAS_PCH_LPT(display))
 		alt = intel_de_read(display, SOUTH_CHICKEN2) & LPT_PWM_GRANULARITY;
 	else
 		alt = intel_de_read(display, SOUTH_CHICKEN1) & SPT_PWM_GRANULARITY;
@@ -1259,7 +1260,7 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus
 
 	panel->backlight.pwm_enabled = pch_ctl1 & BLM_PCH_PWM_ENABLE;
 
-	cpu_mode = panel->backlight.pwm_enabled && HAS_PCH_LPT(i915) &&
+	cpu_mode = panel->backlight.pwm_enabled && HAS_PCH_LPT(display) &&
 		   !(pch_ctl1 & BLM_PCH_OVERRIDE_ENABLE) &&
 		   (cpu_ctl2 & BLM_PWM_ENABLE);
 
@@ -1466,15 +1467,13 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused)
 
 static int cnp_num_backlight_controllers(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
-	if (INTEL_PCH_TYPE(i915) >= PCH_MTL)
+	if (INTEL_PCH_TYPE(display) >= PCH_MTL)
 		return 2;
 
-	if (INTEL_PCH_TYPE(i915) >= PCH_DG1)
+	if (INTEL_PCH_TYPE(display) >= PCH_DG1)
 		return 1;
 
-	if (INTEL_PCH_TYPE(i915) >= PCH_ICP)
+	if (INTEL_PCH_TYPE(display) >= PCH_ICP)
 		return 2;
 
 	return 1;
@@ -1482,14 +1481,12 @@ static int cnp_num_backlight_controllers(struct intel_display *display)
 
 static bool cnp_backlight_controller_is_valid(struct intel_display *display, int controller)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
 	if (controller < 0 || controller >= cnp_num_backlight_controllers(display))
 		return false;
 
 	if (controller == 1 &&
-	    INTEL_PCH_TYPE(i915) >= PCH_ICP &&
-	    INTEL_PCH_TYPE(i915) <= PCH_ADP)
+	    INTEL_PCH_TYPE(display) >= PCH_ICP &&
+	    INTEL_PCH_TYPE(display) <= PCH_ADP)
 		return intel_de_read(display, SOUTH_CHICKEN1) & ICP_SECOND_PPS_IO_SELECT;
 
 	return true;
@@ -1818,7 +1815,6 @@ void intel_backlight_init_funcs(struct intel_panel *panel)
 	struct intel_connector *connector =
 		container_of(panel, struct intel_connector, panel);
 	struct intel_display *display = to_intel_display(connector);
-	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 
 	if (connector->base.connector_type == DRM_MODE_CONNECTOR_DSI &&
 	    intel_dsi_dcs_init_backlight_funcs(connector) == 0)
@@ -1826,14 +1822,14 @@ void intel_backlight_init_funcs(struct intel_panel *panel)
 
 	if (display->platform.geminilake || display->platform.broxton) {
 		panel->backlight.pwm_funcs = &bxt_pwm_funcs;
-	} else if (INTEL_PCH_TYPE(i915) >= PCH_CNP) {
+	} else if (INTEL_PCH_TYPE(display) >= PCH_CNP) {
 		panel->backlight.pwm_funcs = &cnp_pwm_funcs;
-	} else if (INTEL_PCH_TYPE(i915) >= PCH_LPT_H) {
-		if (HAS_PCH_LPT(i915))
+	} else if (INTEL_PCH_TYPE(display) >= PCH_LPT_H) {
+		if (HAS_PCH_LPT(display))
 			panel->backlight.pwm_funcs = &lpt_pwm_funcs;
 		else
 			panel->backlight.pwm_funcs = &spt_pwm_funcs;
-	} else if (HAS_PCH_SPLIT(i915)) {
+	} else if (HAS_PCH_SPLIT(display)) {
 		panel->backlight.pwm_funcs = &pch_pwm_funcs;
 	} else if (display->platform.valleyview || display->platform.cherryview) {
 		if (connector->base.connector_type == DRM_MODE_CONNECTOR_DSI) {
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index fabfcf2caa69..ba7b8938b17c 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -2245,28 +2245,27 @@ static const u8 adlp_ddc_pin_map[] = {
 
 static u8 map_ddc_pin(struct intel_display *display, u8 vbt_pin)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	const u8 *ddc_pin_map;
 	int i, n_entries;
 
-	if (INTEL_PCH_TYPE(i915) >= PCH_MTL || display->platform.alderlake_p) {
+	if (INTEL_PCH_TYPE(display) >= PCH_MTL || display->platform.alderlake_p) {
 		ddc_pin_map = adlp_ddc_pin_map;
 		n_entries = ARRAY_SIZE(adlp_ddc_pin_map);
 	} else if (display->platform.alderlake_s) {
 		ddc_pin_map = adls_ddc_pin_map;
 		n_entries = ARRAY_SIZE(adls_ddc_pin_map);
-	} else if (INTEL_PCH_TYPE(i915) >= PCH_DG1) {
+	} else if (INTEL_PCH_TYPE(display) >= PCH_DG1) {
 		return vbt_pin;
-	} else if (display->platform.rocketlake && INTEL_PCH_TYPE(i915) == PCH_TGP) {
+	} else if (display->platform.rocketlake && INTEL_PCH_TYPE(display) == PCH_TGP) {
 		ddc_pin_map = rkl_pch_tgp_ddc_pin_map;
 		n_entries = ARRAY_SIZE(rkl_pch_tgp_ddc_pin_map);
-	} else if (HAS_PCH_TGP(i915) && DISPLAY_VER(display) == 9) {
+	} else if (HAS_PCH_TGP(display) && DISPLAY_VER(display) == 9) {
 		ddc_pin_map = gen9bc_tgp_ddc_pin_map;
 		n_entries = ARRAY_SIZE(gen9bc_tgp_ddc_pin_map);
-	} else if (INTEL_PCH_TYPE(i915) >= PCH_ICP) {
+	} else if (INTEL_PCH_TYPE(display) >= PCH_ICP) {
 		ddc_pin_map = icp_ddc_pin_map;
 		n_entries = ARRAY_SIZE(icp_ddc_pin_map);
-	} else if (HAS_PCH_CNP(i915)) {
+	} else if (HAS_PCH_CNP(display)) {
 		ddc_pin_map = cnp_ddc_pin_map;
 		n_entries = ARRAY_SIZE(cnp_ddc_pin_map);
 	} else {
@@ -2865,8 +2864,6 @@ parse_general_definitions(struct intel_display *display)
 static void
 init_vbt_defaults(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
 	display->vbt.crt_ddc_pin = GMBUS_PIN_VGADDC;
 
 	/* general features */
@@ -2883,7 +2880,7 @@ init_vbt_defaults(struct intel_display *display)
 	 * clock for LVDS.
 	 */
 	display->vbt.lvds_ssc_freq = intel_bios_ssc_frequency(display,
-							      !HAS_PCH_SPLIT(i915));
+							      !HAS_PCH_SPLIT(display));
 	drm_dbg_kms(display->drm, "Set default to SSC at %d kHz\n",
 		    display->vbt.lvds_ssc_freq);
 }
@@ -3126,7 +3123,7 @@ static const struct vbt_header *intel_bios_get_vbt(struct intel_display *display
 	 * If the OpRegion does not have VBT, look in SPI flash
 	 * through MMIO or PCI mapping
 	 */
-	if (!vbt && IS_DGFX(i915))
+	if (!vbt && display->platform.dgfx)
 		with_intel_display_rpm(display)
 			vbt = oprom_get_vbt(display, intel_rom_spi(i915), sizep, "SPI flash");
 
diff --git a/drivers/gpu/drm/i915/display/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h
index f9841f0498c6..6cd7a011b8c4 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.h
+++ b/drivers/gpu/drm/i915/display/intel_bios.h
@@ -24,7 +24,7 @@
 /*
  * Please use intel_vbt_defs.h for VBT private data, to hide and abstract away
  * the VBT from the rest of the driver. Add the parsed, clean data to struct
- * intel_vbt_data within struct drm_i915_private.
+ * intel_vbt_data within struct intel_display.
  */
 
 #ifndef _INTEL_BIOS_H_
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 6830950aae3f..b1718b491ffd 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -3494,7 +3494,6 @@ static int dg1_rawclk(struct intel_display *display)
 
 static int cnp_rawclk(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	int divider, fraction;
 	u32 rawclk;
 
@@ -3514,7 +3513,7 @@ static int cnp_rawclk(struct intel_display *display)
 
 		rawclk |= CNP_RAWCLK_DEN(DIV_ROUND_CLOSEST(numerator * 1000,
 							   fraction) - 1);
-		if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
+		if (INTEL_PCH_TYPE(display) >= PCH_ICP)
 			rawclk |= ICP_RAWCLK_NUM(numerator);
 	}
 
@@ -3553,21 +3552,20 @@ static int i9xx_hrawclk(struct intel_display *display)
  */
 u32 intel_read_rawclk(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	u32 freq;
 
-	if (INTEL_PCH_TYPE(dev_priv) >= PCH_MTL)
+	if (INTEL_PCH_TYPE(display) >= PCH_MTL)
 		/*
 		 * MTL always uses a 38.4 MHz rawclk.  The bspec tells us
 		 * "RAWCLK_FREQ defaults to the values for 38.4 and does
 		 * not need to be programmed."
 		 */
 		freq = 38400;
-	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG1)
+	else if (INTEL_PCH_TYPE(display) >= PCH_DG1)
 		freq = dg1_rawclk(display);
-	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_CNP)
+	else if (INTEL_PCH_TYPE(display) >= PCH_CNP)
 		freq = cnp_rawclk(display);
-	else if (HAS_PCH_SPLIT(dev_priv))
+	else if (HAS_PCH_SPLIT(display))
 		freq = pch_rawclk(display);
 	else if (display->platform.valleyview || display->platform.cherryview)
 		freq = vlv_hrawclk(display);
diff --git a/drivers/gpu/drm/i915/display/intel_cmtg.c b/drivers/gpu/drm/i915/display/intel_cmtg.c
index 07d7f4e8f60f..82606ebae1de 100644
--- a/drivers/gpu/drm/i915/display/intel_cmtg.c
+++ b/drivers/gpu/drm/i915/display/intel_cmtg.c
@@ -9,7 +9,6 @@
 #include <drm/drm_device.h>
 #include <drm/drm_print.h>
 
-#include "i915_drv.h"
 #include "i915_reg.h"
 #include "intel_crtc.h"
 #include "intel_cmtg.h"
diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c
index cca22d2402e8..38b50a779b6b 100644
--- a/drivers/gpu/drm/i915/display/intel_crt.c
+++ b/drivers/gpu/drm/i915/display/intel_crt.c
@@ -31,9 +31,9 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
-#include "i915_drv.h"
 #include "i915_irq.h"
 #include "i915_reg.h"
 #include "intel_connector.h"
@@ -91,13 +91,12 @@ static struct intel_crt *intel_attached_crt(struct intel_connector *connector)
 bool intel_crt_port_enabled(struct intel_display *display,
 			    i915_reg_t adpa_reg, enum pipe *pipe)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	u32 val;
 
 	val = intel_de_read(display, adpa_reg);
 
 	/* asserts want to know the pipe even if the port is disabled */
-	if (HAS_PCH_CPT(dev_priv))
+	if (HAS_PCH_CPT(display))
 		*pipe = REG_FIELD_GET(ADPA_PIPE_SEL_MASK_CPT, val);
 	else
 		*pipe = REG_FIELD_GET(ADPA_PIPE_SEL_MASK, val);
@@ -177,7 +176,6 @@ static void intel_crt_set_dpms(struct intel_encoder *encoder,
 			       int mode)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crt *crt = intel_encoder_to_crt(encoder);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
@@ -194,14 +192,14 @@ static void intel_crt_set_dpms(struct intel_encoder *encoder,
 		adpa |= ADPA_VSYNC_ACTIVE_HIGH;
 
 	/* For CPT allow 3 pipe config, for others just use A or B */
-	if (HAS_PCH_LPT(dev_priv))
+	if (HAS_PCH_LPT(display))
 		; /* Those bits don't exist here */
-	else if (HAS_PCH_CPT(dev_priv))
+	else if (HAS_PCH_CPT(display))
 		adpa |= ADPA_PIPE_SEL_CPT(crtc->pipe);
 	else
 		adpa |= ADPA_PIPE_SEL(crtc->pipe);
 
-	if (!HAS_PCH_SPLIT(dev_priv))
+	if (!HAS_PCH_SPLIT(display))
 		intel_de_write(display, BCLRPAT(display, crtc->pipe), 0);
 
 	switch (mode) {
@@ -356,7 +354,6 @@ intel_crt_mode_valid(struct drm_connector *connector,
 		     const struct drm_display_mode *mode)
 {
 	struct intel_display *display = to_intel_display(connector->dev);
-	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	int max_dotclk = display->cdclk.max_dotclk_freq;
 	enum drm_mode_status status;
 	int max_clock;
@@ -368,9 +365,9 @@ intel_crt_mode_valid(struct drm_connector *connector,
 	if (mode->clock < 25000)
 		return MODE_CLOCK_LOW;
 
-	if (HAS_PCH_LPT(dev_priv))
+	if (HAS_PCH_LPT(display))
 		max_clock = 180000;
-	else if (IS_VALLEYVIEW(dev_priv))
+	else if (display->platform.valleyview)
 		/*
 		 * 270 MHz due to current DPLL limits,
 		 * DAC limit supposedly 355 MHz.
@@ -387,7 +384,7 @@ intel_crt_mode_valid(struct drm_connector *connector,
 		return MODE_CLOCK_HIGH;
 
 	/* The FDI receiver on LPT only supports 8bpc and only has 2 lanes. */
-	if (HAS_PCH_LPT(dev_priv) &&
+	if (HAS_PCH_LPT(display) &&
 	    ilk_get_lanes_required(mode->clock, 270000, 24) > 2)
 		return MODE_CLOCK_HIGH;
 
@@ -438,7 +435,6 @@ static int hsw_crt_compute_config(struct intel_encoder *encoder,
 				  struct drm_connector_state *conn_state)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct drm_display_mode *adjusted_mode =
 		&crtc_state->hw.adjusted_mode;
 
@@ -457,7 +453,7 @@ static int hsw_crt_compute_config(struct intel_encoder *encoder,
 	crtc_state->output_format = INTEL_OUTPUT_FORMAT_RGB;
 
 	/* LPT FDI RX only supports 8bpc. */
-	if (HAS_PCH_LPT(dev_priv)) {
+	if (HAS_PCH_LPT(display)) {
 		/* TODO: Check crtc_state->max_link_bpp_x16 instead of bw_constrained */
 		if (crtc_state->bw_constrained && crtc_state->pipe_bpp < 24) {
 			drm_dbg_kms(display->drm,
@@ -482,13 +478,12 @@ static bool ilk_crt_detect_hotplug(struct drm_connector *connector)
 {
 	struct intel_display *display = to_intel_display(connector->dev);
 	struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector));
-	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	u32 adpa;
 	bool ret;
 
 	/* The first time through, trigger an explicit detection cycle */
 	if (crt->force_hotplug_required) {
-		bool turn_off_dac = HAS_PCH_SPLIT(dev_priv);
+		bool turn_off_dac = HAS_PCH_SPLIT(display);
 		u32 save_adpa;
 
 		crt->force_hotplug_required = false;
@@ -583,15 +578,14 @@ static bool valleyview_crt_detect_hotplug(struct drm_connector *connector)
 static bool intel_crt_detect_hotplug(struct drm_connector *connector)
 {
 	struct intel_display *display = to_intel_display(connector->dev);
-	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	u32 stat;
 	bool ret = false;
 	int i, tries = 0;
 
-	if (HAS_PCH_SPLIT(dev_priv))
+	if (HAS_PCH_SPLIT(display))
 		return ilk_crt_detect_hotplug(connector);
 
-	if (IS_VALLEYVIEW(dev_priv))
+	if (display->platform.valleyview)
 		return valleyview_crt_detect_hotplug(connector);
 
 	/*
@@ -599,7 +593,7 @@ static bool intel_crt_detect_hotplug(struct drm_connector *connector)
 	 * to get a reliable result.
 	 */
 
-	if (IS_G45(dev_priv))
+	if (display->platform.g45)
 		tries = 2;
 	else
 		tries = 1;
@@ -940,7 +934,6 @@ out:
 static int intel_crt_get_modes(struct drm_connector *connector)
 {
 	struct intel_display *display = to_intel_display(connector->dev);
-	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct intel_crt *crt = intel_attached_crt(to_intel_connector(connector));
 	struct intel_encoder *encoder = &crt->base;
 	intel_wakeref_t wakeref;
@@ -953,7 +946,7 @@ static int intel_crt_get_modes(struct drm_connector *connector)
 	wakeref = intel_display_power_get(display, encoder->power_domain);
 
 	ret = intel_crt_ddc_get_modes(connector, connector->ddc);
-	if (ret || !IS_G4X(dev_priv))
+	if (ret || !display->platform.g4x)
 		goto out;
 
 	/* Try to probe digital port for output in DVI-I -> VGA mode. */
@@ -1012,16 +1005,15 @@ static const struct drm_encoder_funcs intel_crt_enc_funcs = {
 
 void intel_crt_init(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_connector *connector;
 	struct intel_crt *crt;
 	i915_reg_t adpa_reg;
 	u8 ddc_pin;
 	u32 adpa;
 
-	if (HAS_PCH_SPLIT(dev_priv))
+	if (HAS_PCH_SPLIT(display))
 		adpa_reg = PCH_ADPA;
-	else if (IS_VALLEYVIEW(dev_priv))
+	else if (display->platform.valleyview)
 		adpa_reg = VLV_ADPA;
 	else
 		adpa_reg = ADPA;
@@ -1069,7 +1061,7 @@ void intel_crt_init(struct intel_display *display)
 
 	crt->base.type = INTEL_OUTPUT_ANALOG;
 	crt->base.cloneable = BIT(INTEL_OUTPUT_DVO) | BIT(INTEL_OUTPUT_HDMI);
-	if (IS_I830(dev_priv))
+	if (display->platform.i830)
 		crt->base.pipe_mask = BIT(PIPE_A);
 	else
 		crt->base.pipe_mask = ~0;
@@ -1109,7 +1101,7 @@ void intel_crt_init(struct intel_display *display)
 
 		intel_ddi_buf_trans_init(&crt->base);
 	} else {
-		if (HAS_PCH_SPLIT(dev_priv)) {
+		if (HAS_PCH_SPLIT(display)) {
 			crt->base.compute_config = pch_crt_compute_config;
 			crt->base.disable = pch_disable_crt;
 			crt->base.post_disable = pch_post_disable_crt;
@@ -1131,7 +1123,7 @@ void intel_crt_init(struct intel_display *display)
 	 * polarity and link reversal bits or not, instead of relying on the
 	 * BIOS.
 	 */
-	if (HAS_PCH_LPT(dev_priv)) {
+	if (HAS_PCH_LPT(display)) {
 		u32 fdi_config = FDI_RX_POLARITY_REVERSED_LPT |
 				 FDI_RX_LINK_REVERSAL_OVERRIDE;
 
diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c
index 5b2603ef2ff7..29cfc38f12e0 100644
--- a/drivers/gpu/drm/i915/display/intel_crtc.c
+++ b/drivers/gpu/drm/i915/display/intel_crtc.c
@@ -124,7 +124,7 @@ void intel_crtc_vblank_on(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 
-	crtc->block_dc_for_vblank = intel_psr_needs_block_dc_vblank(crtc_state);
+	crtc->vblank_psr_notify = intel_psr_needs_vblank_notification(crtc_state);
 
 	assert_vblank_disabled(&crtc->base);
 	drm_crtc_set_max_vblank_count(&crtc->base,
@@ -154,9 +154,9 @@ void intel_crtc_vblank_off(const struct intel_crtc_state *crtc_state)
 	drm_crtc_vblank_off(&crtc->base);
 	assert_vblank_disabled(&crtc->base);
 
-	crtc->block_dc_for_vblank = false;
+	crtc->vblank_psr_notify = false;
 
-	flush_work(&display->irq.vblank_dc_work);
+	flush_work(&display->irq.vblank_notify_work);
 }
 
 struct intel_crtc_state *intel_crtc_state_alloc(struct intel_crtc *crtc)
@@ -305,7 +305,6 @@ static const struct drm_crtc_funcs i8xx_crtc_funcs = {
 
 int intel_crtc_init(struct intel_display *display, enum pipe pipe)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_plane *primary, *cursor;
 	const struct drm_crtc_funcs *funcs;
 	struct intel_crtc *crtc;
@@ -333,7 +332,7 @@ int intel_crtc_init(struct intel_display *display, enum pipe pipe)
 	for_each_sprite(display, pipe, sprite) {
 		struct intel_plane *plane;
 
-		if (DISPLAY_VER(dev_priv) >= 9)
+		if (DISPLAY_VER(display) >= 9)
 			plane = skl_universal_plane_create(display, pipe, PLANE_2 + sprite);
 		else
 			plane = intel_sprite_plane_create(display, pipe, sprite);
diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c
index 3276a5b4a9b0..2fec5ba58373 100644
--- a/drivers/gpu/drm/i915/display/intel_cursor.c
+++ b/drivers/gpu/drm/i915/display/intel_cursor.c
@@ -9,10 +9,11 @@
 #include <drm/drm_blend.h>
 #include <drm/drm_damage_helper.h>
 #include <drm/drm_fourcc.h>
+#include <drm/drm_print.h>
 #include <drm/drm_vblank.h>
 
-#include "i915_drv.h"
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "intel_atomic.h"
 #include "intel_atomic_plane.h"
 #include "intel_cursor.h"
diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
index 22595766eac5..b09f724c3046 100644
--- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c
@@ -6,8 +6,10 @@
 #include <linux/log2.h>
 #include <linux/math64.h>
 
-#include "i915_drv.h"
+#include <drm/drm_print.h>
+
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "intel_cx0_phy.h"
 #include "intel_cx0_phy_regs.h"
 #include "intel_ddi.h"
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index b48ed5df7a96..74132c1d6385 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -30,11 +30,13 @@
 
 #include <drm/display/drm_dp_helper.h>
 #include <drm/display/drm_scdc_helper.h>
+#include <drm/drm_print.h>
 #include <drm/drm_privacy_screen_consumer.h>
 
-#include "i915_drv.h"
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "icl_dsi.h"
+#include "intel_alpm.h"
 #include "intel_audio.h"
 #include "intel_audio_regs.h"
 #include "intel_backlight.h"
@@ -3553,6 +3555,7 @@ static void intel_ddi_disable_dp(struct intel_atomic_state *state,
 	intel_dp->link.active = false;
 
 	intel_psr_disable(intel_dp, old_crtc_state);
+	intel_alpm_disable(intel_dp);
 	intel_edp_backlight_off(old_conn_state);
 	/* Disable the decompression in DP Sink */
 	intel_dp_sink_disable_decompression(state,
@@ -4902,9 +4905,7 @@ static enum hpd_pin tgl_hpd_pin(struct intel_display *display, enum port port)
 
 static enum hpd_pin rkl_hpd_pin(struct intel_display *display, enum port port)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
-	if (HAS_PCH_TGP(dev_priv))
+	if (HAS_PCH_TGP(display))
 		return tgl_hpd_pin(display, port);
 
 	if (port >= PORT_TC1)
@@ -4923,12 +4924,10 @@ static enum hpd_pin icl_hpd_pin(struct intel_display *display, enum port port)
 
 static enum hpd_pin ehl_hpd_pin(struct intel_display *display, enum port port)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
 	if (port == PORT_D)
 		return HPD_PORT_A;
 
-	if (HAS_PCH_TGP(dev_priv))
+	if (HAS_PCH_TGP(display))
 		return icl_hpd_pin(display, port);
 
 	return HPD_PORT_A + port - PORT_A;
@@ -4936,9 +4935,7 @@ static enum hpd_pin ehl_hpd_pin(struct intel_display *display, enum port port)
 
 static enum hpd_pin skl_hpd_pin(struct intel_display *display, enum port port)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
-	if (HAS_PCH_TGP(dev_priv))
+	if (HAS_PCH_TGP(display))
 		return icl_hpd_pin(display, port);
 
 	return HPD_PORT_A + port - PORT_A;
diff --git a/drivers/gpu/drm/i915/display/intel_de.h b/drivers/gpu/drm/i915/display/intel_de.h
index 655467a6ba87..54ce3e4f8fd9 100644
--- a/drivers/gpu/drm/i915/display/intel_de.h
+++ b/drivers/gpu/drm/i915/display/intel_de.h
@@ -6,7 +6,6 @@
 #ifndef __INTEL_DE_H__
 #define __INTEL_DE_H__
 
-#include "intel_display_conversion.h"
 #include "intel_display_core.h"
 #include "intel_dmc_wl.h"
 #include "intel_dsb.h"
@@ -19,7 +18,7 @@ static inline struct intel_uncore *__to_uncore(struct intel_display *display)
 }
 
 static inline u32
-__intel_de_read(struct intel_display *display, i915_reg_t reg)
+intel_de_read(struct intel_display *display, i915_reg_t reg)
 {
 	u32 val;
 
@@ -31,7 +30,6 @@ __intel_de_read(struct intel_display *display, i915_reg_t reg)
 
 	return val;
 }
-#define intel_de_read(p,...) __intel_de_read(__to_intel_display(p), __VA_ARGS__)
 
 static inline u8
 intel_de_read8(struct intel_display *display, i915_reg_t reg)
@@ -66,7 +64,7 @@ intel_de_read64_2x32(struct intel_display *display,
 }
 
 static inline void
-__intel_de_posting_read(struct intel_display *display, i915_reg_t reg)
+intel_de_posting_read(struct intel_display *display, i915_reg_t reg)
 {
 	intel_dmc_wl_get(display, reg);
 
@@ -74,10 +72,9 @@ __intel_de_posting_read(struct intel_display *display, i915_reg_t reg)
 
 	intel_dmc_wl_put(display, reg);
 }
-#define intel_de_posting_read(p,...) __intel_de_posting_read(__to_intel_display(p), __VA_ARGS__)
 
 static inline void
-__intel_de_write(struct intel_display *display, i915_reg_t reg, u32 val)
+intel_de_write(struct intel_display *display, i915_reg_t reg, u32 val)
 {
 	intel_dmc_wl_get(display, reg);
 
@@ -85,7 +82,6 @@ __intel_de_write(struct intel_display *display, i915_reg_t reg, u32 val)
 
 	intel_dmc_wl_put(display, reg);
 }
-#define intel_de_write(p,...) __intel_de_write(__to_intel_display(p), __VA_ARGS__)
 
 static inline u32
 __intel_de_rmw_nowl(struct intel_display *display, i915_reg_t reg,
@@ -95,8 +91,7 @@ __intel_de_rmw_nowl(struct intel_display *display, i915_reg_t reg,
 }
 
 static inline u32
-__intel_de_rmw(struct intel_display *display, i915_reg_t reg, u32 clear,
-	       u32 set)
+intel_de_rmw(struct intel_display *display, i915_reg_t reg, u32 clear, u32 set)
 {
 	u32 val;
 
@@ -108,7 +103,6 @@ __intel_de_rmw(struct intel_display *display, i915_reg_t reg, u32 clear,
 
 	return val;
 }
-#define intel_de_rmw(p,...) __intel_de_rmw(__to_intel_display(p), __VA_ARGS__)
 
 static inline int
 __intel_de_wait_for_register_nowl(struct intel_display *display,
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 404dd4a8b7df..6f0a0bc71b06 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -55,6 +55,7 @@
 #include "i9xx_plane.h"
 #include "i9xx_plane_regs.h"
 #include "i9xx_wm.h"
+#include "intel_alpm.h"
 #include "intel_atomic.h"
 #include "intel_atomic_plane.h"
 #include "intel_audio.h"
@@ -1043,14 +1044,13 @@ static void intel_post_plane_update(struct intel_atomic_state *state,
 				    struct intel_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(state);
-	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
 	const struct intel_crtc_state *old_crtc_state =
 		intel_atomic_get_old_crtc_state(state, crtc);
 	const struct intel_crtc_state *new_crtc_state =
 		intel_atomic_get_new_crtc_state(state, crtc);
 	enum pipe pipe = crtc->pipe;
 
-	intel_frontbuffer_flip(dev_priv, new_crtc_state->fb_bits);
+	intel_frontbuffer_flip(display, new_crtc_state->fb_bits);
 
 	if (new_crtc_state->update_wm_post && new_crtc_state->hw.active)
 		intel_update_watermarks(display);
@@ -1079,6 +1079,8 @@ static void intel_post_plane_update(struct intel_atomic_state *state,
 	if (audio_enabling(old_crtc_state, new_crtc_state))
 		intel_encoders_audio_enable(state, crtc);
 
+	intel_alpm_post_plane_update(state, crtc);
+
 	intel_psr_post_plane_update(state, crtc);
 }
 
@@ -1174,6 +1176,7 @@ static void intel_pre_plane_update(struct intel_atomic_state *state,
 		intel_atomic_get_new_crtc_state(state, crtc);
 	enum pipe pipe = crtc->pipe;
 
+	intel_alpm_pre_plane_update(state, crtc);
 	intel_psr_pre_plane_update(state, crtc);
 
 	if (intel_crtc_vrr_disabling(state, crtc)) {
@@ -1281,7 +1284,7 @@ static void intel_pre_plane_update(struct intel_atomic_state *state,
 static void intel_crtc_disable_planes(struct intel_atomic_state *state,
 				      struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_display *display = to_intel_display(state);
 	const struct intel_crtc_state *new_crtc_state =
 		intel_atomic_get_new_crtc_state(state, crtc);
 	unsigned int update_mask = new_crtc_state->update_planes;
@@ -1303,7 +1306,7 @@ static void intel_crtc_disable_planes(struct intel_atomic_state *state,
 			fb_bits |= plane->frontbuffer_bit;
 	}
 
-	intel_frontbuffer_flip(dev_priv, fb_bits);
+	intel_frontbuffer_flip(display, fb_bits);
 }
 
 static void intel_encoders_update_prepare(struct intel_atomic_state *state)
@@ -1510,7 +1513,6 @@ static void ilk_crtc_enable(struct intel_atomic_state *state,
 	struct intel_display *display = to_intel_display(crtc);
 	const struct intel_crtc_state *new_crtc_state =
 		intel_atomic_get_new_crtc_state(state, crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 
 	if (drm_WARN_ON(display->drm, crtc->active))
@@ -1562,7 +1564,7 @@ static void ilk_crtc_enable(struct intel_atomic_state *state,
 
 	intel_encoders_enable(state, crtc);
 
-	if (HAS_PCH_CPT(dev_priv))
+	if (HAS_PCH_CPT(display))
 		intel_wait_for_pipe_scanline_moving(crtc);
 
 	/*
@@ -2404,14 +2406,6 @@ static int intel_crtc_compute_pipe_mode(struct intel_crtc_state *crtc_state)
 	return 0;
 }
 
-static bool intel_crtc_needs_wa_14015401596(const struct intel_crtc_state *crtc_state)
-{
-	struct intel_display *display = to_intel_display(crtc_state);
-
-	return intel_vrr_possible(crtc_state) && crtc_state->has_psr &&
-		IS_DISPLAY_VER(display, 13, 14);
-}
-
 static int intel_crtc_vblank_delay(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_display *display = to_intel_display(crtc_state);
@@ -2420,9 +2414,7 @@ static int intel_crtc_vblank_delay(const struct intel_crtc_state *crtc_state)
 	if (!HAS_DSB(display))
 		return 0;
 
-	/* Wa_14015401596 */
-	if (intel_crtc_needs_wa_14015401596(crtc_state))
-		vblank_delay = max(vblank_delay, 1);
+	vblank_delay = max(vblank_delay, intel_psr_min_vblank_delay(crtc_state));
 
 	return vblank_delay;
 }
@@ -2534,15 +2526,13 @@ intel_link_compute_m_n(u16 bits_per_pixel_x16, int nlanes,
 
 void intel_panel_sanitize_ssc(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
 	/*
 	 * There may be no VBT; and if the BIOS enabled SSC we can
 	 * just keep using it to avoid unnecessary flicker.  Whereas if the
 	 * BIOS isn't using it, don't assume it will work even if the VBT
 	 * indicates as much.
 	 */
-	if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) {
+	if (HAS_PCH_IBX(display) || HAS_PCH_CPT(display)) {
 		bool bios_lvds_use_ssc = intel_de_read(display,
 						       PCH_DREF_CONTROL) &
 			DREF_SSC1_ENABLE;
@@ -2724,6 +2714,19 @@ static void intel_set_transcoder_timings(const struct intel_crtc_state *crtc_sta
 		intel_de_write(display, TRANS_VTOTAL(display, pipe),
 			       VACTIVE(crtc_vdisplay - 1) |
 			       VTOTAL(crtc_vtotal - 1));
+
+	if (DISPLAY_VER(display) >= 30) {
+		/*
+		 * Address issues for resolutions with high refresh rate that
+		 * have small Hblank, specifically where Hblank is smaller than
+		 * one MTP. Simulations indicate this will address the
+		 * jitter issues that currently causes BS to be immediately
+		 * followed by BE which DPRX devices are unable to handle.
+		 * https://groups.vesa.org/wg/DP/document/20494
+		 */
+		intel_de_write(display, DP_MIN_HBLANK_CTL(cpu_transcoder),
+			       crtc_state->min_hblank);
+	}
 }
 
 static void intel_set_transcoder_timings_lrr(const struct intel_crtc_state *crtc_state)
@@ -2867,6 +2870,10 @@ static void intel_get_transcoder_timings(struct intel_crtc *crtc,
 			adjusted_mode->crtc_vdisplay +
 			intel_de_read(display,
 				      TRANS_SET_CONTEXT_LATENCY(display, cpu_transcoder));
+
+	if (DISPLAY_VER(display) >= 30)
+		pipe_config->min_hblank = intel_de_read(display,
+							DP_MIN_HBLANK_CTL(cpu_transcoder));
 }
 
 static void intel_joiner_adjust_pipe_src(struct intel_crtc_state *crtc_state)
@@ -5217,6 +5224,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	PIPE_CONF_CHECK_I(lane_count);
 	PIPE_CONF_CHECK_X(lane_lat_optim_mask);
 
+	PIPE_CONF_CHECK_I(min_hblank);
+
 	if (HAS_DOUBLE_BUFFERED_M_N(display)) {
 		if (!fastset || !pipe_config->update_m_n)
 			PIPE_CONF_CHECK_M_N(dp_m_n);
@@ -6541,7 +6550,6 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state,
 {
 	struct intel_display *display = to_intel_display(new_crtc_state);
 	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
 	/*
 	 * Update pipe size and adjust fitter if needed: the reason for this is
@@ -6557,7 +6565,7 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state,
 	if (DISPLAY_VER(display) >= 9) {
 		if (new_crtc_state->pch_pfit.enabled)
 			skl_pfit_enable(new_crtc_state);
-	} else if (HAS_PCH_SPLIT(dev_priv)) {
+	} else if (HAS_PCH_SPLIT(display)) {
 		if (new_crtc_state->pch_pfit.enabled)
 			ilk_pfit_enable(new_crtc_state);
 		else if (old_crtc_state->pch_pfit.enabled)
@@ -6658,6 +6666,8 @@ static void intel_enable_crtc(struct intel_atomic_state *state,
 		intel_crtc_update_active_timings(pipe_crtc_state, false);
 	}
 
+	intel_psr_notify_pipe_change(state, crtc, true);
+
 	display->funcs.display->crtc_enable(state, crtc);
 
 	/* vblanks work again, re-enable pipe CRC. */
@@ -6777,6 +6787,8 @@ static void intel_old_crtc_state_disables(struct intel_atomic_state *state,
 					 intel_crtc_joined_pipe_mask(old_crtc_state))
 		intel_crtc_disable_pipe_crc(pipe_crtc);
 
+	intel_psr_notify_pipe_change(state, crtc, false);
+
 	display->funcs.display->crtc_disable(state, crtc);
 
 	for_each_intel_crtc_in_pipe_mask(display->drm, pipe_crtc,
@@ -7633,15 +7645,13 @@ static bool ilk_has_edp_a(struct intel_display *display)
 
 static bool intel_ddi_crt_present(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
 	if (DISPLAY_VER(display) >= 9)
 		return false;
 
 	if (display->platform.haswell_ult || display->platform.broadwell_ult)
 		return false;
 
-	if (HAS_PCH_LPT_H(dev_priv) &&
+	if (HAS_PCH_LPT_H(display) &&
 	    intel_de_read(display, SFUSE_STRAP) & SFUSE_STRAP_CRT_DISABLED)
 		return false;
 
@@ -7663,7 +7673,6 @@ bool assert_port_valid(struct intel_display *display, enum port port)
 
 void intel_setup_outputs(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_encoder *encoder;
 	bool dpd_is_edp = false;
 
@@ -7680,7 +7689,7 @@ void intel_setup_outputs(struct intel_display *display)
 
 		if (display->platform.geminilake || display->platform.broxton)
 			vlv_dsi_init(display);
-	} else if (HAS_PCH_SPLIT(dev_priv)) {
+	} else if (HAS_PCH_SPLIT(display)) {
 		int found;
 
 		/*
@@ -8048,13 +8057,11 @@ static const struct intel_display_funcs i9xx_display_funcs = {
  */
 void intel_init_display_hooks(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
 	if (DISPLAY_VER(display) >= 9) {
 		display->funcs.display = &skl_display_funcs;
 	} else if (HAS_DDI(display)) {
 		display->funcs.display = &ddi_display_funcs;
-	} else if (HAS_PCH_SPLIT(dev_priv)) {
+	} else if (HAS_PCH_SPLIT(display)) {
 		display->funcs.display = &pch_split_display_funcs;
 	} else if (display->platform.cherryview ||
 		   display->platform.valleyview) {
diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h
index eb6d6f2d0f75..b4937e102360 100644
--- a/drivers/gpu/drm/i915/display/intel_display_core.h
+++ b/drivers/gpu/drm/i915/display/intel_display_core.h
@@ -21,17 +21,15 @@
 #include "intel_display_limits.h"
 #include "intel_display_params.h"
 #include "intel_display_power.h"
+#include "intel_dmc_wl.h"
 #include "intel_dpll_mgr.h"
 #include "intel_fbc.h"
 #include "intel_global_state.h"
 #include "intel_gmbus.h"
 #include "intel_opregion.h"
-#include "intel_dmc_wl.h"
+#include "intel_pch.h"
 #include "intel_wm_types.h"
 
-struct task_struct;
-
-struct drm_i915_private;
 struct drm_property;
 struct drm_property_blob;
 struct i915_audio_component;
@@ -52,6 +50,7 @@ struct intel_hotplug_funcs;
 struct intel_initial_plane_config;
 struct intel_opregion;
 struct intel_overlay;
+struct task_struct;
 
 /* Amount of SAGV/QGV points, BSpec precisely defines this */
 #define I915_NUM_QGV_POINTS 8
@@ -180,7 +179,7 @@ struct intel_hotplug {
 
 	/*
 	 * Queuing of hotplug_work, reenable_work and poll_init_work is
-	 * enabled. Protected by drm_i915_private::irq_lock.
+	 * enabled. Protected by intel_display::irq::lock.
 	 */
 	bool detection_work_enabled;
 
@@ -289,6 +288,9 @@ struct intel_display {
 	/* Platform (and subplatform, if any) identification */
 	struct intel_display_platforms platform;
 
+	/* Intel PCH: where the south display engine lives */
+	enum intel_pch pch_type;
+
 	/* Display functions */
 	struct {
 		/* Top level crtc-ish functions */
@@ -426,7 +428,7 @@ struct intel_display {
 		 * reused when sending message to gsc cs.
 		 * this is only populated post Meteorlake
 		 */
-		struct intel_hdcp_gsc_message *hdcp_message;
+		struct intel_hdcp_gsc_context *gsc_context;
 		/* Mutex to protect the above hdcp related values. */
 		struct mutex hdcp_mutex;
 	} hdcp;
@@ -454,6 +456,9 @@ struct intel_display {
 	} ips;
 
 	struct {
+		/* protects the irq masks */
+		spinlock_t lock;
+
 		/*
 		 * Most platforms treat the display irq block as an always-on
 		 * power domain. vlv/chv can disable it at runtime and need
@@ -466,9 +471,9 @@ struct intel_display {
 		/* For i915gm/i945gm vblank irq workaround */
 		u8 vblank_enabled;
 
-		int vblank_wa_num_pipes;
+		int vblank_enable_count;
 
-		struct work_struct vblank_dc_work;
+		struct work_struct vblank_notify_work;
 
 		u32 de_irq_mask[I915_MAX_PIPES];
 		u32 pipestat_irq_mask[I915_MAX_PIPES];
@@ -575,6 +580,8 @@ struct intel_display {
 	struct intel_vbt_data vbt;
 	struct intel_dmc_wl wl;
 	struct intel_wm wm;
+
+	struct work_struct psr_dc5_dc6_wa_work;
 };
 
 #endif /* __INTEL_DISPLAY_CORE_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
index 4c208fdb9137..8d0a1779dd19 100644
--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
@@ -7,11 +7,12 @@
 #include <linux/string_helpers.h>
 
 #include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_file.h>
 #include <drm/drm_fourcc.h>
 
 #include "hsw_ips.h"
-#include "i915_drv.h"
 #include "i915_irq.h"
 #include "i915_reg.h"
 #include "i9xx_wm_regs.h"
@@ -53,10 +54,9 @@ static struct intel_display *node_to_intel_display(struct drm_info_node *node)
 static int intel_display_caps(struct seq_file *m, void *data)
 {
 	struct intel_display *display = node_to_intel_display(m->private);
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct drm_printer p = drm_seq_file_printer(m);
 
-	drm_printf(&p, "PCH type: %d\n", INTEL_PCH_TYPE(i915));
+	drm_printf(&p, "PCH type: %d\n", INTEL_PCH_TYPE(display));
 
 	intel_display_device_info_print(DISPLAY_INFO(display),
 					DISPLAY_RUNTIME_INFO(display), &p);
@@ -85,7 +85,6 @@ static int i915_frontbuffer_tracking(struct seq_file *m, void *unused)
 static int i915_sr_status(struct seq_file *m, void *unused)
 {
 	struct intel_display *display = node_to_intel_display(m->private);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	intel_wakeref_t wakeref;
 	bool sr_enabled = false;
 
@@ -93,7 +92,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 
 	if (DISPLAY_VER(display) >= 9)
 		/* no global SR status; inspect per-plane WM */;
-	else if (HAS_PCH_SPLIT(dev_priv))
+	else if (HAS_PCH_SPLIT(display))
 		sr_enabled = intel_de_read(display, WM1_LP_ILK) & WM_LP_ENABLE;
 	else if (display->platform.i965gm || display->platform.g4x ||
 		 display->platform.i945g || display->platform.i945gm)
@@ -558,6 +557,8 @@ static void intel_crtc_info(struct seq_file *m, struct intel_crtc *crtc)
 	seq_printf(m, "\tpipe src=" DRM_RECT_FMT ", dither=%s, bpp=%d\n",
 		   DRM_RECT_ARG(&crtc_state->pipe_src),
 		   str_yes_no(crtc_state->dither), crtc_state->pipe_bpp);
+	seq_printf(m, "\tport_clock=%d, lane_count=%d\n",
+		   crtc_state->port_clock, crtc_state->lane_count);
 
 	intel_scaler_info(m, crtc);
 
diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c
index 738ae522c8f4..90d714598664 100644
--- a/drivers/gpu/drm/i915/display/intel_display_device.c
+++ b/drivers/gpu/drm/i915/display/intel_display_device.c
@@ -3,11 +3,13 @@
  * Copyright © 2023 Intel Corporation
  */
 
-#include <drm/intel/pciids.h>
-#include <drm/drm_color_mgmt.h>
 #include <linux/pci.h>
 
-#include "i915_drv.h"
+#include <drm/drm_color_mgmt.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_print.h>
+#include <drm/intel/pciids.h>
+
 #include "i915_reg.h"
 #include "intel_cx0_phy_regs.h"
 #include "intel_de.h"
@@ -1711,7 +1713,6 @@ void intel_display_device_remove(struct intel_display *display)
 
 static void __intel_display_device_info_runtime_init(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct intel_display_runtime_info *display_runtime = DISPLAY_RUNTIME_INFO(display);
 	enum pipe pipe;
 
@@ -1775,7 +1776,7 @@ static void __intel_display_device_info_runtime_init(struct intel_display *displ
 		goto display_fused_off;
 	}
 
-	if (IS_DISPLAY_VER(display, 7, 8) && HAS_PCH_SPLIT(i915)) {
+	if (IS_DISPLAY_VER(display, 7, 8) && HAS_PCH_SPLIT(display)) {
 		u32 fuse_strap = intel_de_read(display, FUSE_STRAP);
 		u32 sfuse_strap = intel_de_read(display, SFUSE_STRAP);
 
@@ -1790,7 +1791,7 @@ static void __intel_display_device_info_runtime_init(struct intel_display *displ
 		 */
 		if (fuse_strap & ILK_INTERNAL_DISPLAY_DISABLE ||
 		    sfuse_strap & SFUSE_STRAP_DISPLAY_DISABLED ||
-		    (HAS_PCH_CPT(i915) &&
+		    (HAS_PCH_CPT(display) &&
 		     !(sfuse_strap & SFUSE_STRAP_FUSE_LOCK))) {
 			drm_info(display->drm,
 				 "Display fused off, disabling\n");
diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c
index efee8925987e..5c74ab5fd1aa 100644
--- a/drivers/gpu/drm/i915/display/intel_display_driver.c
+++ b/drivers/gpu/drm/i915/display/intel_display_driver.c
@@ -54,6 +54,7 @@
 #include "intel_plane_initial.h"
 #include "intel_pmdemand.h"
 #include "intel_pps.h"
+#include "intel_psr.h"
 #include "intel_quirks.h"
 #include "intel_vga.h"
 #include "intel_wm.h"
@@ -180,6 +181,9 @@ static void intel_plane_possible_crtcs_init(struct intel_display *display)
 
 void intel_display_driver_early_probe(struct intel_display *display)
 {
+	/* This must be called before any calls to HAS_PCH_* */
+	intel_pch_detect(display);
+
 	if (!HAS_DISPLAY(display))
 		return;
 
@@ -223,6 +227,8 @@ int intel_display_driver_probe_noirq(struct intel_display *display)
 	if (ret)
 		goto cleanup_bios;
 
+	intel_psr_dc5_dc6_wa_init(display);
+
 	/* FIXME: completely on the wrong abstraction layer */
 	ret = intel_power_domains_init(display);
 	if (ret < 0)
@@ -415,7 +421,6 @@ bool intel_display_driver_check_access(struct intel_display *display)
 /* part #2: call after irq install, but before gem init */
 int intel_display_driver_probe_nogem(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	enum pipe pipe;
 	int ret;
 
@@ -453,8 +458,6 @@ int intel_display_driver_probe_nogem(struct intel_display *display)
 
 	intel_hti_init(display);
 
-	/* Just disable it once at startup */
-	intel_vga_disable(display);
 	intel_setup_outputs(display);
 
 	ret = intel_dp_tunnel_mgr_init(display);
@@ -464,7 +467,7 @@ int intel_display_driver_probe_nogem(struct intel_display *display)
 	intel_display_driver_disable_user_access(display);
 
 	drm_modeset_lock_all(display->drm);
-	intel_modeset_setup_hw_state(i915, display->drm->mode_config.acquire_ctx);
+	intel_modeset_setup_hw_state(display, display->drm->mode_config.acquire_ctx);
 	intel_acpi_assign_connector_fwnodes(display);
 	drm_modeset_unlock_all(display->drm);
 
@@ -525,7 +528,6 @@ int intel_display_driver_probe(struct intel_display *display)
 
 void intel_display_driver_register(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct drm_printer p = drm_dbg_printer(display->drm, DRM_UT_KMS,
 					       "i915 display info:");
 
@@ -552,7 +554,7 @@ void intel_display_driver_register(struct intel_display *display)
 	drm_kms_helper_poll_init(display->drm);
 	intel_hpd_poll_disable(display);
 
-	intel_fbdev_setup(i915);
+	intel_fbdev_setup(display);
 
 	intel_display_device_info_print(DISPLAY_INFO(display),
 					DISPLAY_RUNTIME_INFO(display), &p);
@@ -687,13 +689,11 @@ __intel_display_driver_resume(struct intel_display *display,
 			      struct drm_atomic_state *state,
 			      struct drm_modeset_acquire_ctx *ctx)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct drm_crtc_state *crtc_state;
 	struct drm_crtc *crtc;
 	int ret, i;
 
-	intel_modeset_setup_hw_state(i915, ctx);
-	intel_vga_redisable(display);
+	intel_modeset_setup_hw_state(display, ctx);
 
 	if (!state)
 		return 0;
diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c
index d2a35e3630b1..3e73832e5e81 100644
--- a/drivers/gpu/drm/i915/display/intel_display_irq.c
+++ b/drivers/gpu/drm/i915/display/intel_display_irq.c
@@ -5,7 +5,6 @@
 
 #include <drm/drm_vblank.h>
 
-#include "gt/intel_rps.h"
 #include "i915_drv.h"
 #include "i915_irq.h"
 #include "i915_reg.h"
@@ -15,6 +14,7 @@
 #include "intel_de.h"
 #include "intel_display_irq.h"
 #include "intel_display_rpm.h"
+#include "intel_display_rps.h"
 #include "intel_display_trace.h"
 #include "intel_display_types.h"
 #include "intel_dmc_wl.h"
@@ -135,7 +135,7 @@ void ilk_update_display_irq(struct intel_display *display,
 	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	u32 new_val;
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 	drm_WARN_ON(display->drm, enabled_irq_mask & ~interrupt_mask);
 
 	new_val = dev_priv->irq_mask;
@@ -173,7 +173,7 @@ void bdw_update_port_irq(struct intel_display *display,
 	u32 new_val;
 	u32 old_val;
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	drm_WARN_ON(display->drm, enabled_irq_mask & ~interrupt_mask);
 
@@ -206,7 +206,7 @@ static void bdw_update_pipe_irq(struct intel_display *display,
 	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	u32 new_val;
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	drm_WARN_ON(display->drm, enabled_irq_mask & ~interrupt_mask);
 
@@ -254,7 +254,7 @@ void ibx_display_interrupt_update(struct intel_display *display,
 
 	drm_WARN_ON(display->drm, enabled_irq_mask & ~interrupt_mask);
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	if (drm_WARN_ON(display->drm, !intel_irqs_enabled(dev_priv)))
 		return;
@@ -276,11 +276,10 @@ void ibx_disable_display_interrupt(struct intel_display *display, u32 bits)
 u32 i915_pipestat_enable_mask(struct intel_display *display,
 			      enum pipe pipe)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	u32 status_mask = display->irq.pipestat_irq_mask[pipe];
 	u32 enable_mask = status_mask << 16;
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	if (DISPLAY_VER(display) < 5)
 		goto out;
@@ -329,7 +328,7 @@ void i915_enable_pipestat(struct intel_display *display,
 		      "pipe %c: status_mask=0x%x\n",
 		      pipe_name(pipe), status_mask);
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 	drm_WARN_ON(display->drm, !intel_irqs_enabled(dev_priv));
 
 	if ((display->irq.pipestat_irq_mask[pipe] & status_mask) == status_mask)
@@ -353,7 +352,7 @@ void i915_disable_pipestat(struct intel_display *display,
 		      "pipe %c: status_mask=0x%x\n",
 		      pipe_name(pipe), status_mask);
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 	drm_WARN_ON(display->drm, !intel_irqs_enabled(dev_priv));
 
 	if ((display->irq.pipestat_irq_mask[pipe] & status_mask) == 0)
@@ -377,28 +376,23 @@ static bool i915_has_legacy_blc_interrupt(struct intel_display *display)
 	return IS_DISPLAY_VER(display, 3, 4) && display->platform.mobile;
 }
 
-/**
- * i915_enable_asle_pipestat - enable ASLE pipestat for OpRegion
- * @display: display device
- */
-void i915_enable_asle_pipestat(struct intel_display *display)
+/* enable ASLE pipestat for OpRegion */
+static void i915_enable_asle_pipestat(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
 	if (!intel_opregion_asle_present(display))
 		return;
 
 	if (!i915_has_legacy_blc_interrupt(display))
 		return;
 
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	i915_enable_pipestat(display, PIPE_B, PIPE_LEGACY_BLC_EVENT_STATUS);
 	if (DISPLAY_VER(display) >= 4)
 		i915_enable_pipestat(display, PIPE_A,
 				     PIPE_LEGACY_BLC_EVENT_STATUS);
 
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -517,14 +511,13 @@ static void i9xx_pipestat_irq_reset(struct intel_display *display)
 void i9xx_pipestat_irq_ack(struct intel_display *display,
 			   u32 iir, u32 pipe_stats[I915_MAX_PIPES])
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	enum pipe pipe;
 
-	spin_lock(&dev_priv->irq_lock);
+	spin_lock(&display->irq.lock);
 
 	if ((display->platform.valleyview || display->platform.cherryview) &&
 	    !display->irq.vlv_display_irqs_enabled) {
-		spin_unlock(&dev_priv->irq_lock);
+		spin_unlock(&display->irq.lock);
 		return;
 	}
 
@@ -579,7 +572,7 @@ void i9xx_pipestat_irq_ack(struct intel_display *display,
 			intel_de_write(display, reg, enable_mask);
 		}
 	}
-	spin_unlock(&dev_priv->irq_lock);
+	spin_unlock(&display->irq.lock);
 }
 
 void i915_pipestat_irq_handler(struct intel_display *display,
@@ -879,7 +872,6 @@ static void ilk_gtt_fault_irq_handler(struct intel_display *display)
 
 void ilk_display_irq_handler(struct intel_display *display, u32 de_iir)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	enum pipe pipe;
 	u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG;
 
@@ -916,7 +908,7 @@ void ilk_display_irq_handler(struct intel_display *display, u32 de_iir)
 	if (de_iir & DE_PCH_EVENT) {
 		u32 pch_iir = intel_de_read(display, SDEIIR);
 
-		if (HAS_PCH_CPT(dev_priv))
+		if (HAS_PCH_CPT(display))
 			cpt_irq_handler(display, pch_iir);
 		else
 			ibx_irq_handler(display, pch_iir);
@@ -926,12 +918,11 @@ void ilk_display_irq_handler(struct intel_display *display, u32 de_iir)
 	}
 
 	if (DISPLAY_VER(display) == 5 && de_iir & DE_PCU_EVENT)
-		gen5_rps_irq_handler(&to_gt(dev_priv)->rps);
+		ilk_display_rps_irq_handler(display);
 }
 
 void ivb_display_irq_handler(struct intel_display *display, u32 de_iir)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	enum pipe pipe;
 	u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG_IVB;
 
@@ -969,7 +960,7 @@ void ivb_display_irq_handler(struct intel_display *display, u32 de_iir)
 	}
 
 	/* check event from PCH */
-	if (!HAS_PCH_NOP(dev_priv) && (de_iir & DE_PCH_EVENT_IVB)) {
+	if (!HAS_PCH_NOP(display) && (de_iir & DE_PCH_EVENT_IVB)) {
 		u32 pch_iir = intel_de_read(display, SDEIIR);
 
 		cpt_irq_handler(display, pch_iir);
@@ -1311,7 +1302,6 @@ static u32 gen8_de_pipe_flip_done_mask(struct intel_display *display)
 
 static void gen8_read_and_ack_pch_irqs(struct intel_display *display, u32 *pch_iir, u32 *pica_iir)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	u32 pica_ier = 0;
 
 	*pica_iir = 0;
@@ -1325,7 +1315,7 @@ static void gen8_read_and_ack_pch_irqs(struct intel_display *display, u32 *pch_i
 	 * their flags both in the PICA and SDE IIR.
 	 */
 	if (*pch_iir & SDE_PICAINTERRUPT) {
-		drm_WARN_ON(display->drm, INTEL_PCH_TYPE(i915) < PCH_MTL);
+		drm_WARN_ON(display->drm, INTEL_PCH_TYPE(display) < PCH_MTL);
 
 		pica_ier = intel_de_rmw(display, PICAINTERRUPT_IER, ~0, 0);
 		*pica_iir = intel_de_read(display, PICAINTERRUPT_IIR);
@@ -1340,7 +1330,6 @@ static void gen8_read_and_ack_pch_irqs(struct intel_display *display, u32 *pch_i
 
 void gen8_de_irq_handler(struct intel_display *display, u32 master_ctl)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	u32 iir;
 	enum pipe pipe;
 
@@ -1465,7 +1454,7 @@ void gen8_de_irq_handler(struct intel_display *display, u32 master_ctl)
 						     pipe, fault_errors);
 	}
 
-	if (HAS_PCH_SPLIT(dev_priv) && !HAS_PCH_NOP(dev_priv) &&
+	if (HAS_PCH_SPLIT(display) && !HAS_PCH_NOP(display) &&
 	    master_ctl & GEN8_DE_PCH_IRQ) {
 		u32 pica_iir;
 
@@ -1479,9 +1468,9 @@ void gen8_de_irq_handler(struct intel_display *display, u32 master_ctl)
 			if (pica_iir)
 				xelpdp_pica_irq_handler(display, pica_iir);
 
-			if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
+			if (INTEL_PCH_TYPE(display) >= PCH_ICP)
 				icp_irq_handler(display, iir);
-			else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT)
+			else if (INTEL_PCH_TYPE(display) >= PCH_SPT)
 				spt_irq_handler(display, iir);
 			else
 				cpt_irq_handler(display, iir);
@@ -1573,13 +1562,12 @@ void i915gm_irq_cstate_wa(struct intel_display *display, bool enable)
 int i8xx_enable_vblank(struct drm_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(crtc->dev);
-	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	enum pipe pipe = to_intel_crtc(crtc)->pipe;
 	unsigned long irqflags;
 
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	spin_lock_irqsave(&display->irq.lock, irqflags);
 	i915_enable_pipestat(display, pipe, PIPE_VBLANK_INTERRUPT_STATUS);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+	spin_unlock_irqrestore(&display->irq.lock, irqflags);
 
 	return 0;
 }
@@ -1587,13 +1575,12 @@ int i8xx_enable_vblank(struct drm_crtc *crtc)
 void i8xx_disable_vblank(struct drm_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(crtc->dev);
-	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	enum pipe pipe = to_intel_crtc(crtc)->pipe;
 	unsigned long irqflags;
 
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	spin_lock_irqsave(&display->irq.lock, irqflags);
 	i915_disable_pipestat(display, pipe, PIPE_VBLANK_INTERRUPT_STATUS);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+	spin_unlock_irqrestore(&display->irq.lock, irqflags);
 }
 
 int i915gm_enable_vblank(struct drm_crtc *crtc)
@@ -1617,14 +1604,13 @@ void i915gm_disable_vblank(struct drm_crtc *crtc)
 int i965_enable_vblank(struct drm_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(crtc->dev);
-	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	enum pipe pipe = to_intel_crtc(crtc)->pipe;
 	unsigned long irqflags;
 
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	spin_lock_irqsave(&display->irq.lock, irqflags);
 	i915_enable_pipestat(display, pipe,
 			     PIPE_START_VBLANK_INTERRUPT_STATUS);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+	spin_unlock_irqrestore(&display->irq.lock, irqflags);
 
 	return 0;
 }
@@ -1632,28 +1618,26 @@ int i965_enable_vblank(struct drm_crtc *crtc)
 void i965_disable_vblank(struct drm_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(crtc->dev);
-	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	enum pipe pipe = to_intel_crtc(crtc)->pipe;
 	unsigned long irqflags;
 
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	spin_lock_irqsave(&display->irq.lock, irqflags);
 	i915_disable_pipestat(display, pipe,
 			      PIPE_START_VBLANK_INTERRUPT_STATUS);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+	spin_unlock_irqrestore(&display->irq.lock, irqflags);
 }
 
 int ilk_enable_vblank(struct drm_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(crtc->dev);
-	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	enum pipe pipe = to_intel_crtc(crtc)->pipe;
 	unsigned long irqflags;
 	u32 bit = DISPLAY_VER(display) >= 7 ?
 		DE_PIPE_VBLANK_IVB(pipe) : DE_PIPE_VBLANK(pipe);
 
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	spin_lock_irqsave(&display->irq.lock, irqflags);
 	ilk_enable_display_irq(display, bit);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+	spin_unlock_irqrestore(&display->irq.lock, irqflags);
 
 	/* Even though there is no DMC, frame counter can get stuck when
 	 * PSR is active as no frames are generated.
@@ -1667,15 +1651,14 @@ int ilk_enable_vblank(struct drm_crtc *crtc)
 void ilk_disable_vblank(struct drm_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(crtc->dev);
-	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	enum pipe pipe = to_intel_crtc(crtc)->pipe;
 	unsigned long irqflags;
 	u32 bit = DISPLAY_VER(display) >= 7 ?
 		DE_PIPE_VBLANK_IVB(pipe) : DE_PIPE_VBLANK(pipe);
 
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	spin_lock_irqsave(&display->irq.lock, irqflags);
 	ilk_disable_display_irq(display, bit);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+	spin_unlock_irqrestore(&display->irq.lock, irqflags);
 }
 
 static bool gen11_dsi_configure_te(struct intel_crtc *intel_crtc,
@@ -1701,39 +1684,31 @@ static bool gen11_dsi_configure_te(struct intel_crtc *intel_crtc,
 	return true;
 }
 
-static void intel_display_vblank_dc_work(struct work_struct *work)
+static void intel_display_vblank_notify_work(struct work_struct *work)
 {
 	struct intel_display *display =
-		container_of(work, typeof(*display), irq.vblank_dc_work);
-	int vblank_wa_num_pipes = READ_ONCE(display->irq.vblank_wa_num_pipes);
+		container_of(work, typeof(*display), irq.vblank_notify_work);
+	int vblank_enable_count = READ_ONCE(display->irq.vblank_enable_count);
 
-	/*
-	 * NOTE: intel_display_power_set_target_dc_state is used only by PSR
-	 * code for DC3CO handling. DC3CO target state is currently disabled in
-	 * PSR code. If DC3CO is taken into use we need take that into account
-	 * here as well.
-	 */
-	intel_display_power_set_target_dc_state(display, vblank_wa_num_pipes ? DC_STATE_DISABLE :
-						DC_STATE_EN_UPTO_DC6);
+	intel_psr_notify_vblank_enable_disable(display, vblank_enable_count);
 }
 
 int bdw_enable_vblank(struct drm_crtc *_crtc)
 {
 	struct intel_crtc *crtc = to_intel_crtc(_crtc);
 	struct intel_display *display = to_intel_display(crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	unsigned long irqflags;
 
 	if (gen11_dsi_configure_te(crtc, true))
 		return 0;
 
-	if (crtc->block_dc_for_vblank && display->irq.vblank_wa_num_pipes++ == 0)
-		schedule_work(&display->irq.vblank_dc_work);
+	if (crtc->vblank_psr_notify && display->irq.vblank_enable_count++ == 0)
+		schedule_work(&display->irq.vblank_notify_work);
 
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	spin_lock_irqsave(&display->irq.lock, irqflags);
 	bdw_enable_pipe_irq(display, pipe, GEN8_PIPE_VBLANK);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+	spin_unlock_irqrestore(&display->irq.lock, irqflags);
 
 	/* Even if there is no DMC, frame counter can get stuck when
 	 * PSR is active as no frames are generated, so check only for PSR.
@@ -1748,19 +1723,18 @@ void bdw_disable_vblank(struct drm_crtc *_crtc)
 {
 	struct intel_crtc *crtc = to_intel_crtc(_crtc);
 	struct intel_display *display = to_intel_display(crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	unsigned long irqflags;
 
 	if (gen11_dsi_configure_te(crtc, false))
 		return;
 
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	spin_lock_irqsave(&display->irq.lock, irqflags);
 	bdw_disable_pipe_irq(display, pipe, GEN8_PIPE_VBLANK);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+	spin_unlock_irqrestore(&display->irq.lock, irqflags);
 
-	if (crtc->block_dc_for_vblank && --display->irq.vblank_wa_num_pipes == 0)
-		schedule_work(&display->irq.vblank_dc_work);
+	if (crtc->vblank_psr_notify && --display->irq.vblank_enable_count == 0)
+		schedule_work(&display->irq.vblank_notify_work);
 }
 
 static u32 vlv_dpinvgtt_pipe_fault_mask(enum pipe pipe)
@@ -1894,8 +1868,10 @@ static void _vlv_display_irq_reset(struct intel_display *display)
 
 void vlv_display_irq_reset(struct intel_display *display)
 {
+	spin_lock_irq(&display->irq.lock);
 	if (display->irq.vlv_display_irqs_enabled)
 		_vlv_display_irq_reset(display);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 void i9xx_display_irq_reset(struct intel_display *display)
@@ -1908,22 +1884,48 @@ void i9xx_display_irq_reset(struct intel_display *display)
 	i9xx_pipestat_irq_reset(display);
 }
 
+void i915_display_irq_postinstall(struct intel_display *display)
+{
+	/*
+	 * Interrupt setup is already guaranteed to be single-threaded, this is
+	 * just to make the assert_spin_locked check happy.
+	 */
+	spin_lock_irq(&display->irq.lock);
+	i915_enable_pipestat(display, PIPE_A, PIPE_CRC_DONE_INTERRUPT_STATUS);
+	i915_enable_pipestat(display, PIPE_B, PIPE_CRC_DONE_INTERRUPT_STATUS);
+	spin_unlock_irq(&display->irq.lock);
+
+	i915_enable_asle_pipestat(display);
+}
+
+void i965_display_irq_postinstall(struct intel_display *display)
+{
+	/*
+	 * Interrupt setup is already guaranteed to be single-threaded, this is
+	 * just to make the assert_spin_locked check happy.
+	 */
+	spin_lock_irq(&display->irq.lock);
+	i915_enable_pipestat(display, PIPE_A, PIPE_GMBUS_INTERRUPT_STATUS);
+	i915_enable_pipestat(display, PIPE_A, PIPE_CRC_DONE_INTERRUPT_STATUS);
+	i915_enable_pipestat(display, PIPE_B, PIPE_CRC_DONE_INTERRUPT_STATUS);
+	spin_unlock_irq(&display->irq.lock);
+
+	i915_enable_asle_pipestat(display);
+}
+
 static u32 vlv_error_mask(void)
 {
 	/* TODO enable other errors too? */
 	return VLV_ERROR_PAGE_TABLE;
 }
 
-void vlv_display_irq_postinstall(struct intel_display *display)
+static void _vlv_display_irq_postinstall(struct intel_display *display)
 {
 	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	u32 pipestat_mask;
 	u32 enable_mask;
 	enum pipe pipe;
 
-	if (!display->irq.vlv_display_irqs_enabled)
-		return;
-
 	if (display->platform.cherryview)
 		intel_de_write(display, DPINVGTT,
 			       DPINVGTT_STATUS_MASK_CHV |
@@ -1960,8 +1962,30 @@ void vlv_display_irq_postinstall(struct intel_display *display)
 	intel_display_irq_regs_init(display, VLV_IRQ_REGS, dev_priv->irq_mask, enable_mask);
 }
 
+void vlv_display_irq_postinstall(struct intel_display *display)
+{
+	spin_lock_irq(&display->irq.lock);
+	if (display->irq.vlv_display_irqs_enabled)
+		_vlv_display_irq_postinstall(display);
+	spin_unlock_irq(&display->irq.lock);
+}
+
+void ibx_display_irq_reset(struct intel_display *display)
+{
+	struct drm_i915_private *i915 = to_i915(display->drm);
+
+	if (HAS_PCH_NOP(i915))
+		return;
+
+	gen2_irq_reset(to_intel_uncore(display->drm), SDE_IRQ_REGS);
+
+	if (HAS_PCH_CPT(i915) || HAS_PCH_LPT(i915))
+		intel_de_write(display, SERR_INT, 0xffffffff);
+}
+
 void gen8_display_irq_reset(struct intel_display *display)
 {
+	struct drm_i915_private *i915 = to_i915(display->drm);
 	enum pipe pipe;
 
 	if (!HAS_DISPLAY(display))
@@ -1977,11 +2001,13 @@ void gen8_display_irq_reset(struct intel_display *display)
 
 	intel_display_irq_regs_reset(display, GEN8_DE_PORT_IRQ_REGS);
 	intel_display_irq_regs_reset(display, GEN8_DE_MISC_IRQ_REGS);
+
+	if (HAS_PCH_SPLIT(i915))
+		ibx_display_irq_reset(display);
 }
 
 void gen11_display_irq_reset(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	enum pipe pipe;
 	u32 trans_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) |
 		BIT(TRANSCODER_C) | BIT(TRANSCODER_D);
@@ -2026,7 +2052,7 @@ void gen11_display_irq_reset(struct intel_display *display)
 	else
 		intel_display_irq_regs_reset(display, GEN11_DE_HPD_IRQ_REGS);
 
-	if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
+	if (INTEL_PCH_TYPE(display) >= PCH_ICP)
 		intel_display_irq_regs_reset(display, SDE_IRQ_REGS);
 }
 
@@ -2038,10 +2064,10 @@ void gen8_irq_power_well_post_enable(struct intel_display *display,
 		gen8_de_pipe_flip_done_mask(display);
 	enum pipe pipe;
 
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	if (!intel_irqs_enabled(dev_priv)) {
-		spin_unlock_irq(&dev_priv->irq_lock);
+		spin_unlock_irq(&display->irq.lock);
 		return;
 	}
 
@@ -2050,7 +2076,7 @@ void gen8_irq_power_well_post_enable(struct intel_display *display,
 					    display->irq.de_irq_mask[pipe],
 					    ~display->irq.de_irq_mask[pipe] | extra_ier);
 
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 void gen8_irq_power_well_pre_disable(struct intel_display *display,
@@ -2059,17 +2085,17 @@ void gen8_irq_power_well_pre_disable(struct intel_display *display,
 	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	enum pipe pipe;
 
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	if (!intel_irqs_enabled(dev_priv)) {
-		spin_unlock_irq(&dev_priv->irq_lock);
+		spin_unlock_irq(&display->irq.lock);
 		return;
 	}
 
 	for_each_pipe_masked(display, pipe, pipe_mask)
 		intel_display_irq_regs_reset(display, GEN8_DE_PIPE_IRQ_REGS(pipe));
 
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 
 	/* make sure we're done processing display irqs */
 	intel_synchronize_irq(dev_priv);
@@ -2088,15 +2114,14 @@ void gen8_irq_power_well_pre_disable(struct intel_display *display,
  */
 static void ibx_irq_postinstall(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	u32 mask;
 
-	if (HAS_PCH_NOP(dev_priv))
+	if (HAS_PCH_NOP(display))
 		return;
 
-	if (HAS_PCH_IBX(dev_priv))
+	if (HAS_PCH_IBX(display))
 		mask = SDE_GMBUS | SDE_AUX_MASK | SDE_POISON;
-	else if (HAS_PCH_CPT(dev_priv) || HAS_PCH_LPT(dev_priv))
+	else if (HAS_PCH_CPT(display) || HAS_PCH_LPT(display))
 		mask = SDE_GMBUS_CPT | SDE_AUX_MASK_CPT;
 	else
 		mask = SDE_GMBUS_CPT;
@@ -2108,32 +2133,37 @@ void valleyview_enable_display_irqs(struct intel_display *display)
 {
 	struct drm_i915_private *dev_priv = to_i915(display->drm);
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	if (display->irq.vlv_display_irqs_enabled)
-		return;
+		goto out;
 
 	display->irq.vlv_display_irqs_enabled = true;
 
 	if (intel_irqs_enabled(dev_priv)) {
 		_vlv_display_irq_reset(display);
-		vlv_display_irq_postinstall(display);
+		_vlv_display_irq_postinstall(display);
 	}
+
+out:
+	spin_unlock_irq(&display->irq.lock);
 }
 
 void valleyview_disable_display_irqs(struct intel_display *display)
 {
 	struct drm_i915_private *dev_priv = to_i915(display->drm);
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	if (!display->irq.vlv_display_irqs_enabled)
-		return;
+		goto out;
 
 	display->irq.vlv_display_irqs_enabled = false;
 
 	if (intel_irqs_enabled(dev_priv))
 		_vlv_display_irq_reset(display);
+out:
+	spin_unlock_irq(&display->irq.lock);
 }
 
 void ilk_de_irq_postinstall(struct intel_display *display)
@@ -2184,8 +2214,6 @@ static void icp_irq_postinstall(struct intel_display *display);
 
 void gen8_de_irq_postinstall(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
 	u32 de_pipe_masked = gen8_de_pipe_fault_mask(display) |
 		GEN8_PIPE_CDCLK_CRC_DONE;
 	u32 de_pipe_enables;
@@ -2201,9 +2229,9 @@ void gen8_de_irq_postinstall(struct intel_display *display)
 
 	if (DISPLAY_VER(display) >= 14)
 		mtp_irq_postinstall(display);
-	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
+	else if (INTEL_PCH_TYPE(display) >= PCH_ICP)
 		icp_irq_postinstall(display);
-	else if (HAS_PCH_SPLIT(dev_priv))
+	else if (HAS_PCH_SPLIT(display))
 		ibx_irq_postinstall(display);
 
 	if (DISPLAY_VER(display) < 11)
@@ -2323,9 +2351,40 @@ void dg1_de_irq_postinstall(struct intel_display *display)
 
 void intel_display_irq_init(struct intel_display *display)
 {
+	spin_lock_init(&display->irq.lock);
+
 	display->drm->vblank_disable_immediate = true;
 
 	intel_hotplug_irq_init(display);
 
-	INIT_WORK(&display->irq.vblank_dc_work, intel_display_vblank_dc_work);
+	INIT_WORK(&display->irq.vblank_notify_work,
+		  intel_display_vblank_notify_work);
+}
+
+struct intel_display_irq_snapshot {
+	u32 derrmr;
+};
+
+struct intel_display_irq_snapshot *
+intel_display_irq_snapshot_capture(struct intel_display *display)
+{
+	struct intel_display_irq_snapshot *snapshot;
+
+	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
+	if (!snapshot)
+		return NULL;
+
+	if (DISPLAY_VER(display) >= 6 && DISPLAY_VER(display) < 20 && !HAS_GMCH(display))
+		snapshot->derrmr = intel_de_read(display, DERRMR);
+
+	return snapshot;
+}
+
+void intel_display_irq_snapshot_print(const struct intel_display_irq_snapshot *snapshot,
+				      struct drm_printer *p)
+{
+	if (!snapshot)
+		return;
+
+	drm_printf(p, "DERRMR: 0x%08x\n", snapshot->derrmr);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.h b/drivers/gpu/drm/i915/display/intel_display_irq.h
index f72727768351..c66db3851da4 100644
--- a/drivers/gpu/drm/i915/display/intel_display_irq.h
+++ b/drivers/gpu/drm/i915/display/intel_display_irq.h
@@ -12,7 +12,9 @@
 
 enum pipe;
 struct drm_crtc;
+struct drm_printer;
 struct intel_display;
+struct intel_display_irq_snapshot;
 
 void valleyview_enable_display_irqs(struct intel_display *display);
 void valleyview_disable_display_irqs(struct intel_display *display);
@@ -54,10 +56,13 @@ u32 gen11_gu_misc_irq_ack(struct intel_display *display, const u32 master_ctl);
 void gen11_gu_misc_irq_handler(struct intel_display *display, const u32 iir);
 
 void i9xx_display_irq_reset(struct intel_display *display);
+void ibx_display_irq_reset(struct intel_display *display);
 void vlv_display_irq_reset(struct intel_display *display);
 void gen8_display_irq_reset(struct intel_display *display);
 void gen11_display_irq_reset(struct intel_display *display);
 
+void i915_display_irq_postinstall(struct intel_display *display);
+void i965_display_irq_postinstall(struct intel_display *display);
 void vlv_display_irq_postinstall(struct intel_display *display);
 void ilk_de_irq_postinstall(struct intel_display *display);
 void gen8_de_irq_postinstall(struct intel_display *display);
@@ -67,7 +72,6 @@ void dg1_de_irq_postinstall(struct intel_display *display);
 u32 i915_pipestat_enable_mask(struct intel_display *display, enum pipe pipe);
 void i915_enable_pipestat(struct intel_display *display, enum pipe pipe, u32 status_mask);
 void i915_disable_pipestat(struct intel_display *display, enum pipe pipe, u32 status_mask);
-void i915_enable_asle_pipestat(struct intel_display *display);
 
 void i9xx_pipestat_irq_ack(struct intel_display *display, u32 iir, u32 pipe_stats[I915_MAX_PIPES]);
 
@@ -82,4 +86,7 @@ void intel_display_irq_init(struct intel_display *display);
 
 void i915gm_irq_cstate_wa(struct intel_display *display, bool enable);
 
+struct intel_display_irq_snapshot *intel_display_irq_snapshot_capture(struct intel_display *display);
+void intel_display_irq_snapshot_print(const struct intel_display_irq_snapshot *snapshot, struct drm_printer *p);
+
 #endif /* __INTEL_DISPLAY_IRQ_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index c78315eb44fa..16356523816f 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -323,6 +323,35 @@ unlock:
 	mutex_unlock(&power_domains->lock);
 }
 
+/**
+ * intel_display_power_get_current_dc_state - Set target dc state.
+ * @display: display device
+ *
+ * This function set the "DC off" power well target_dc_state,
+ * based upon this target_dc_stste, "DC off" power well will
+ * enable desired DC state.
+ */
+u32 intel_display_power_get_current_dc_state(struct intel_display *display)
+{
+	struct i915_power_well *power_well;
+	struct i915_power_domains *power_domains = &display->power.domains;
+	u32 current_dc_state = DC_STATE_DISABLE;
+
+	mutex_lock(&power_domains->lock);
+	power_well = lookup_power_well(display, SKL_DISP_DC_OFF);
+
+	if (drm_WARN_ON(display->drm, !power_well))
+		goto unlock;
+
+	current_dc_state = intel_power_well_is_enabled(display, power_well) ?
+		DC_STATE_DISABLE : power_domains->target_dc_state;
+
+unlock:
+	mutex_unlock(&power_domains->lock);
+
+	return current_dc_state;
+}
+
 static void __async_put_domains_mask(struct i915_power_domains *power_domains,
 				     struct intel_power_domain_mask *mask)
 {
@@ -1365,11 +1394,9 @@ static void hsw_restore_lcpll(struct intel_display *display)
  */
 static void hsw_enable_pc8(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
 	drm_dbg_kms(display->drm, "Enabling package C8+\n");
 
-	if (HAS_PCH_LPT_LP(dev_priv))
+	if (HAS_PCH_LPT_LP(display))
 		intel_de_rmw(display, SOUTH_DSPCLK_GATE_D,
 			     PCH_LP_PARTITION_LEVEL_DISABLE, 0);
 
@@ -1415,14 +1442,13 @@ static void intel_pch_reset_handshake(struct intel_display *display,
 static void skl_display_core_init(struct intel_display *display,
 				  bool resume)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct i915_power_domains *power_domains = &display->power.domains;
 	struct i915_power_well *well;
 
 	gen9_set_dc_state(display, DC_STATE_DISABLE);
 
 	/* enable PCH reset handshake */
-	intel_pch_reset_handshake(display, !HAS_PCH_NOP(dev_priv));
+	intel_pch_reset_handshake(display, !HAS_PCH_NOP(display));
 
 	if (!HAS_DISPLAY(display))
 		return;
@@ -1624,20 +1650,19 @@ static void tgl_bw_buddy_init(struct intel_display *display)
 static void icl_display_core_init(struct intel_display *display,
 				  bool resume)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct i915_power_domains *power_domains = &display->power.domains;
 	struct i915_power_well *well;
 
 	gen9_set_dc_state(display, DC_STATE_DISABLE);
 
 	/* Wa_14011294188:ehl,jsl,tgl,rkl,adl-s */
-	if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP &&
-	    INTEL_PCH_TYPE(dev_priv) < PCH_DG1)
+	if (INTEL_PCH_TYPE(display) >= PCH_TGP &&
+	    INTEL_PCH_TYPE(display) < PCH_DG1)
 		intel_de_rmw(display, SOUTH_DSPCLK_GATE_D, 0,
 			     PCH_DPMGUNIT_CLOCK_GATE_DISABLE);
 
 	/* 1. Enable PCH reset handshake. */
-	intel_pch_reset_handshake(display, !HAS_PCH_NOP(dev_priv));
+	intel_pch_reset_handshake(display, !HAS_PCH_NOP(display));
 
 	if (!HAS_DISPLAY(display))
 		return;
@@ -1908,7 +1933,6 @@ static void intel_power_domains_verify_state(struct intel_display *display);
  */
 void intel_power_domains_init_hw(struct intel_display *display, bool resume)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct i915_power_domains *power_domains = &display->power.domains;
 
 	power_domains->initializing = true;
@@ -1932,9 +1956,9 @@ void intel_power_domains_init_hw(struct intel_display *display, bool resume)
 		assert_isp_power_gated(display);
 	} else if (display->platform.broadwell || display->platform.haswell) {
 		hsw_assert_cdclk(display);
-		intel_pch_reset_handshake(display, !HAS_PCH_NOP(i915));
+		intel_pch_reset_handshake(display, !HAS_PCH_NOP(display));
 	} else if (display->platform.ivybridge) {
-		intel_pch_reset_handshake(display, !HAS_PCH_NOP(i915));
+		intel_pch_reset_handshake(display, !HAS_PCH_NOP(display));
 	}
 
 	/*
@@ -2229,8 +2253,6 @@ static void intel_power_domains_verify_state(struct intel_display *display)
 
 void intel_display_power_suspend_late(struct intel_display *display, bool s2idle)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
 	intel_power_domains_suspend(display, s2idle);
 
 	if (DISPLAY_VER(display) >= 11 || display->platform.geminilake ||
@@ -2241,14 +2263,12 @@ void intel_display_power_suspend_late(struct intel_display *display, bool s2idle
 	}
 
 	/* Tweaked Wa_14010685332:cnp,icp,jsp,mcc,tgp,adp */
-	if (INTEL_PCH_TYPE(i915) >= PCH_CNP && INTEL_PCH_TYPE(i915) < PCH_DG1)
-		intel_de_rmw(i915, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, SBCLK_RUN_REFCLK_DIS);
+	if (INTEL_PCH_TYPE(display) >= PCH_CNP && INTEL_PCH_TYPE(display) < PCH_DG1)
+		intel_de_rmw(display, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, SBCLK_RUN_REFCLK_DIS);
 }
 
 void intel_display_power_resume_early(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
 	if (DISPLAY_VER(display) >= 11 || display->platform.geminilake ||
 	    display->platform.broxton) {
 		gen9_sanitize_dc_state(display);
@@ -2258,8 +2278,8 @@ void intel_display_power_resume_early(struct intel_display *display)
 	}
 
 	/* Tweaked Wa_14010685332:cnp,icp,jsp,mcc,tgp,adp */
-	if (INTEL_PCH_TYPE(i915) >= PCH_CNP && INTEL_PCH_TYPE(i915) < PCH_DG1)
-		intel_de_rmw(i915, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, 0);
+	if (INTEL_PCH_TYPE(display) >= PCH_CNP && INTEL_PCH_TYPE(display) < PCH_DG1)
+		intel_de_rmw(display, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, 0);
 
 	intel_power_domains_resume(display);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
index 1b53d67f9b60..f8813b0e16df 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.h
+++ b/drivers/gpu/drm/i915/display/intel_display_power.h
@@ -183,6 +183,7 @@ void intel_display_power_suspend(struct intel_display *display);
 void intel_display_power_resume(struct intel_display *display);
 void intel_display_power_set_target_dc_state(struct intel_display *display,
 					     u32 state);
+u32 intel_display_power_get_current_dc_state(struct intel_display *display);
 
 bool intel_display_power_is_enabled(struct intel_display *display,
 				    enum intel_display_power_domain domain);
diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c
index b9b4359751cc..b104bce0e14d 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power_well.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c
@@ -25,6 +25,7 @@
 #include "intel_hotplug.h"
 #include "intel_pcode.h"
 #include "intel_pps.h"
+#include "intel_psr.h"
 #include "intel_tc.h"
 #include "intel_vga.h"
 #include "skl_watermark.h"
@@ -760,6 +761,9 @@ void gen9_set_dc_state(struct intel_display *display, u32 state)
 			     state & ~power_domains->allowed_dc_mask))
 		state &= power_domains->allowed_dc_mask;
 
+	if (!power_domains->initializing)
+		intel_psr_notify_dc5_dc6(display);
+
 	val = intel_de_read(display, DC_STATE_EN);
 	mask = gen9_dc_mask(display);
 	drm_dbg_kms(display->drm, "Setting DC state from %02x to %02x\n",
@@ -1208,7 +1212,6 @@ static void vlv_init_display_clock_gating(struct intel_display *display)
 
 static void vlv_display_power_well_init(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_encoder *encoder;
 	enum pipe pipe;
 
@@ -1232,9 +1235,7 @@ static void vlv_display_power_well_init(struct intel_display *display)
 
 	vlv_init_display_clock_gating(display);
 
-	spin_lock_irq(&dev_priv->irq_lock);
 	valleyview_enable_display_irqs(display);
-	spin_unlock_irq(&dev_priv->irq_lock);
 
 	/*
 	 * During driver initialization/resume we can avoid restoring the
@@ -1252,7 +1253,7 @@ static void vlv_display_power_well_init(struct intel_display *display)
 			intel_crt_reset(&encoder->base);
 	}
 
-	intel_vga_redisable_power_on(display);
+	intel_vga_disable(display);
 
 	intel_pps_unlock_regs_wa(display);
 }
@@ -1261,9 +1262,7 @@ static void vlv_display_power_well_deinit(struct intel_display *display)
 {
 	struct drm_i915_private *dev_priv = to_i915(display->drm);
 
-	spin_lock_irq(&dev_priv->irq_lock);
 	valleyview_disable_display_irqs(display);
-	spin_unlock_irq(&dev_priv->irq_lock);
 
 	/* make sure we're done processing display irqs */
 	intel_synchronize_irq(dev_priv);
diff --git a/drivers/gpu/drm/i915/display/intel_display_rps.c b/drivers/gpu/drm/i915/display/intel_display_rps.c
index 4074a1879828..678b24115951 100644
--- a/drivers/gpu/drm/i915/display/intel_display_rps.c
+++ b/drivers/gpu/drm/i915/display/intel_display_rps.c
@@ -8,6 +8,8 @@
 
 #include "gt/intel_rps.h"
 #include "i915_drv.h"
+#include "i915_reg.h"
+#include "intel_display_irq.h"
 #include "intel_display_rps.h"
 #include "intel_display_types.h"
 
@@ -81,3 +83,24 @@ void intel_display_rps_mark_interactive(struct intel_display *display,
 	intel_rps_mark_interactive(&to_gt(i915)->rps, interactive);
 	state->rps_interactive = interactive;
 }
+
+void ilk_display_rps_enable(struct intel_display *display)
+{
+	spin_lock(&display->irq.lock);
+	ilk_enable_display_irq(display, DE_PCU_EVENT);
+	spin_unlock(&display->irq.lock);
+}
+
+void ilk_display_rps_disable(struct intel_display *display)
+{
+	spin_lock(&display->irq.lock);
+	ilk_disable_display_irq(display, DE_PCU_EVENT);
+	spin_unlock(&display->irq.lock);
+}
+
+void ilk_display_rps_irq_handler(struct intel_display *display)
+{
+	struct drm_i915_private *i915 = to_i915(display->drm);
+
+	gen5_rps_irq_handler(&to_gt(i915)->rps);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_display_rps.h b/drivers/gpu/drm/i915/display/intel_display_rps.h
index 556891edb2dd..183d154f2c7c 100644
--- a/drivers/gpu/drm/i915/display/intel_display_rps.h
+++ b/drivers/gpu/drm/i915/display/intel_display_rps.h
@@ -13,10 +13,34 @@ struct drm_crtc;
 struct intel_atomic_state;
 struct intel_display;
 
+#ifdef I915
 void intel_display_rps_boost_after_vblank(struct drm_crtc *crtc,
 					  struct dma_fence *fence);
 void intel_display_rps_mark_interactive(struct intel_display *display,
 					struct intel_atomic_state *state,
 					bool interactive);
+void ilk_display_rps_enable(struct intel_display *display);
+void ilk_display_rps_disable(struct intel_display *display);
+void ilk_display_rps_irq_handler(struct intel_display *display);
+#else
+static inline void intel_display_rps_boost_after_vblank(struct drm_crtc *crtc,
+							struct dma_fence *fence)
+{
+}
+static inline void intel_display_rps_mark_interactive(struct intel_display *display,
+						      struct intel_atomic_state *state,
+						      bool interactive)
+{
+}
+static inline void ilk_display_rps_enable(struct intel_display *display)
+{
+}
+static inline void ilk_display_rps_disable(struct intel_display *display)
+{
+}
+static inline void ilk_display_rps_irq_handler(struct intel_display *display)
+{
+}
+#endif
 
 #endif /* __INTEL_DISPLAY_RPS_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_display_snapshot.c b/drivers/gpu/drm/i915/display/intel_display_snapshot.c
index 25ba043cbb65..66087302fdbc 100644
--- a/drivers/gpu/drm/i915/display/intel_display_snapshot.c
+++ b/drivers/gpu/drm/i915/display/intel_display_snapshot.c
@@ -7,6 +7,7 @@
 
 #include "intel_display_core.h"
 #include "intel_display_device.h"
+#include "intel_display_irq.h"
 #include "intel_display_params.h"
 #include "intel_display_snapshot.h"
 #include "intel_dmc.h"
@@ -20,6 +21,7 @@ struct intel_display_snapshot {
 	struct intel_display_params params;
 	struct intel_overlay_snapshot *overlay;
 	struct intel_dmc_snapshot *dmc;
+	struct intel_display_irq_snapshot *irq;
 };
 
 struct intel_display_snapshot *intel_display_snapshot_capture(struct intel_display *display)
@@ -38,6 +40,7 @@ struct intel_display_snapshot *intel_display_snapshot_capture(struct intel_displ
 
 	intel_display_params_copy(&snapshot->params);
 
+	snapshot->irq = intel_display_irq_snapshot_capture(display);
 	snapshot->overlay = intel_overlay_snapshot_capture(display);
 	snapshot->dmc = intel_dmc_snapshot_capture(display);
 
@@ -57,6 +60,7 @@ void intel_display_snapshot_print(const struct intel_display_snapshot *snapshot,
 	intel_display_device_info_print(&snapshot->info, &snapshot->runtime_info, p);
 	intel_display_params_dump(&snapshot->params, display->drm->driver->name, p);
 
+	intel_display_irq_snapshot_print(snapshot->irq, p);
 	intel_overlay_snapshot_print(snapshot->overlay, p);
 	intel_dmc_snapshot_print(snapshot->dmc, p);
 }
@@ -68,6 +72,7 @@ void intel_display_snapshot_free(struct intel_display_snapshot *snapshot)
 
 	intel_display_params_free(&snapshot->params);
 
+	kfree(snapshot->irq);
 	kfree(snapshot->overlay);
 	kfree(snapshot->dmc);
 	kfree(snapshot);
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 94468a9d2e0d..d6d0440dcee9 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -1114,6 +1114,7 @@ struct intel_crtc_state {
 	bool wm_level_disabled;
 	u32 dc3co_exitline;
 	u16 su_y_granularity;
+	u8 active_non_psr_pipes;
 
 	/*
 	 * Frequency the dpll for the port should run at. Differs from the
@@ -1387,7 +1388,7 @@ struct intel_crtc {
 	/* armed event for DSB based updates */
 	struct drm_pending_vblank_event *dsb_event;
 
-	/* Access to these should be protected by dev_priv->irq_lock. */
+	/* Access to these should be protected by display->irq.lock. */
 	bool cpu_fifo_underrun_disabled;
 	bool pch_fifo_underrun_disabled;
 
@@ -1439,7 +1440,7 @@ struct intel_crtc {
 	struct intel_pipe_crc pipe_crc;
 #endif
 
-	bool block_dc_for_vblank;
+	bool vblank_psr_notify;
 };
 
 struct intel_plane_error {
@@ -1650,6 +1651,8 @@ struct intel_psr {
 	u8 entry_setup_frames;
 
 	bool link_ok;
+
+	u8 active_non_psr_pipes;
 };
 
 struct intel_dp {
@@ -1805,12 +1808,16 @@ struct intel_dp {
 	struct {
 		u8 io_wake_lines;
 		u8 fast_wake_lines;
+		enum transcoder transcoder;
+		struct mutex lock;
 
 		/* LNL and beyond */
 		u8 check_entry_lines;
 		u8 aux_less_wake_lines;
 		u8 silence_period_sym_clocks;
 		u8 lfps_half_cycle_num_of_syms;
+		bool lobf_disable_debug;
+		bool sink_alpm_error;
 	} alpm_parameters;
 
 	u8 alpm_dpcd;
diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c
index 98f80a6c63e8..b58189d24e7e 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc.c
+++ b/drivers/gpu/drm/i915/display/intel_dmc.c
@@ -173,7 +173,6 @@ MODULE_FIRMWARE(BXT_DMC_PATH);
 
 static const char *dmc_firmware_default(struct intel_display *display, u32 *size)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	const char *fw_path = NULL;
 	u32 max_fw_size = 0;
 
@@ -189,39 +188,39 @@ static const char *dmc_firmware_default(struct intel_display *display, u32 *size
 	} else if (DISPLAY_VERx100(display) == 1400) {
 		fw_path = MTL_DMC_PATH;
 		max_fw_size = XELPDP_DMC_MAX_FW_SIZE;
-	} else if (IS_DG2(i915)) {
+	} else if (display->platform.dg2) {
 		fw_path = DG2_DMC_PATH;
 		max_fw_size = DISPLAY_VER13_DMC_MAX_FW_SIZE;
-	} else if (IS_ALDERLAKE_P(i915)) {
+	} else if (display->platform.alderlake_p) {
 		fw_path = ADLP_DMC_PATH;
 		max_fw_size = DISPLAY_VER13_DMC_MAX_FW_SIZE;
-	} else if (IS_ALDERLAKE_S(i915)) {
+	} else if (display->platform.alderlake_s) {
 		fw_path = ADLS_DMC_PATH;
 		max_fw_size = DISPLAY_VER12_DMC_MAX_FW_SIZE;
-	} else if (IS_DG1(i915)) {
+	} else if (display->platform.dg1) {
 		fw_path = DG1_DMC_PATH;
 		max_fw_size = DISPLAY_VER12_DMC_MAX_FW_SIZE;
-	} else if (IS_ROCKETLAKE(i915)) {
+	} else if (display->platform.rocketlake) {
 		fw_path = RKL_DMC_PATH;
 		max_fw_size = DISPLAY_VER12_DMC_MAX_FW_SIZE;
-	} else if (IS_TIGERLAKE(i915)) {
+	} else if (display->platform.tigerlake) {
 		fw_path = TGL_DMC_PATH;
 		max_fw_size = DISPLAY_VER12_DMC_MAX_FW_SIZE;
 	} else if (DISPLAY_VER(display) == 11) {
 		fw_path = ICL_DMC_PATH;
 		max_fw_size = ICL_DMC_MAX_FW_SIZE;
-	} else if (IS_GEMINILAKE(i915)) {
+	} else if (display->platform.geminilake) {
 		fw_path = GLK_DMC_PATH;
 		max_fw_size = GLK_DMC_MAX_FW_SIZE;
-	} else if (IS_KABYLAKE(i915) ||
-		   IS_COFFEELAKE(i915) ||
-		   IS_COMETLAKE(i915)) {
+	} else if (display->platform.kabylake ||
+		   display->platform.coffeelake ||
+		   display->platform.cometlake) {
 		fw_path = KBL_DMC_PATH;
 		max_fw_size = KBL_DMC_MAX_FW_SIZE;
-	} else if (IS_SKYLAKE(i915)) {
+	} else if (display->platform.skylake) {
 		fw_path = SKL_DMC_PATH;
 		max_fw_size = SKL_DMC_MAX_FW_SIZE;
-	} else if (IS_BROXTON(i915)) {
+	} else if (display->platform.broxton) {
 		fw_path = BXT_DMC_PATH;
 		max_fw_size = BXT_DMC_MAX_FW_SIZE;
 	}
@@ -517,6 +516,54 @@ void intel_dmc_disable_pipe(struct intel_display *display, enum pipe pipe)
 		intel_de_rmw(display, PIPEDMC_CONTROL(pipe), PIPEDMC_ENABLE, 0);
 }
 
+/**
+ * intel_dmc_block_pkgc() - block PKG C-state
+ * @display: display instance
+ * @pipe: pipe which register use to block
+ * @block: block/unblock
+ *
+ * This interface is target for Wa_16025596647 usage. I.e. to set/clear
+ * PIPEDMC_BLOCK_PKGC_SW_BLOCK_PKGC_ALWAYS bit in PIPEDMC_BLOCK_PKGC_SW register.
+ */
+void intel_dmc_block_pkgc(struct intel_display *display, enum pipe pipe,
+			  bool block)
+{
+	intel_de_rmw(display, PIPEDMC_BLOCK_PKGC_SW(pipe),
+		     PIPEDMC_BLOCK_PKGC_SW_BLOCK_PKGC_ALWAYS, block ?
+		     PIPEDMC_BLOCK_PKGC_SW_BLOCK_PKGC_ALWAYS : 0);
+}
+
+/**
+ * intel_dmc_start_pkgc_exit_at_start_of_undelayed_vblank() - start of PKG
+ * C-state exit
+ * @display: display instance
+ * @pipe: pipe which register use to block
+ * @enable: enable/disable
+ *
+ * This interface is target for Wa_16025596647 usage. I.e. start the package C
+ * exit at the start of the undelayed vblank
+ */
+void intel_dmc_start_pkgc_exit_at_start_of_undelayed_vblank(struct intel_display *display,
+							    enum pipe pipe, bool enable)
+{
+	u32 val;
+
+	if (enable)
+		val = DMC_EVT_CTL_ENABLE | DMC_EVT_CTL_RECURRING |
+			REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK,
+				       DMC_EVT_CTL_TYPE_EDGE_0_1) |
+			REG_FIELD_PREP(DMC_EVT_CTL_EVENT_ID_MASK,
+				       DMC_EVT_CTL_EVENT_ID_VBLANK_A);
+	else
+		val = REG_FIELD_PREP(DMC_EVT_CTL_EVENT_ID_MASK,
+				     DMC_EVT_CTL_EVENT_ID_FALSE) |
+			REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK,
+				       DMC_EVT_CTL_TYPE_EDGE_0_1);
+
+	intel_de_write(display, MTL_PIPEDMC_EVT_CTL_4(pipe),
+		       val);
+}
+
 static bool is_dmc_evt_ctl_reg(struct intel_display *display,
 			       enum intel_dmc_id dmc_id, i915_reg_t reg)
 {
@@ -541,8 +588,6 @@ static bool disable_dmc_evt(struct intel_display *display,
 			    enum intel_dmc_id dmc_id,
 			    i915_reg_t reg, u32 data)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
 	if (!is_dmc_evt_ctl_reg(display, dmc_id, reg))
 		return false;
 
@@ -551,12 +596,12 @@ static bool disable_dmc_evt(struct intel_display *display,
 		return true;
 
 	/* also disable the flip queue event on the main DMC on TGL */
-	if (IS_TIGERLAKE(i915) &&
+	if (display->platform.tigerlake &&
 	    REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_CLK_MSEC)
 		return true;
 
 	/* also disable the HRR event on the main DMC on TGL/ADLS */
-	if ((IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915)) &&
+	if ((display->platform.tigerlake || display->platform.alderlake_s) &&
 	    REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == DMC_EVT_CTL_EVENT_ID_VBLANK_A)
 		return true;
 
@@ -588,7 +633,6 @@ static u32 dmc_mmiodata(struct intel_display *display,
  */
 void intel_dmc_load_program(struct intel_display *display)
 {
-	struct drm_i915_private *i915 __maybe_unused = to_i915(display->drm);
 	struct i915_power_domains *power_domains = &display->power.domains;
 	struct intel_dmc *dmc = display_to_dmc(display);
 	enum intel_dmc_id dmc_id;
@@ -1012,9 +1056,7 @@ static void intel_dmc_runtime_pm_put(struct intel_display *display)
 
 static const char *dmc_fallback_path(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
-	if (IS_ALDERLAKE_P(i915))
+	if (display->platform.alderlake_p)
 		return ADLP_DMC_FALLBACK_PATH;
 
 	return NULL;
@@ -1279,7 +1321,6 @@ static bool intel_dmc_get_dc6_allowed_count(struct intel_display *display, u32 *
 static int intel_dmc_debugfs_status_show(struct seq_file *m, void *unused)
 {
 	struct intel_display *display = m->private;
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct intel_dmc *dmc = display_to_dmc(display);
 	struct ref_tracker *wakeref;
 	i915_reg_t dc5_reg, dc6_reg = INVALID_MMIO_REG;
@@ -1299,7 +1340,7 @@ static int intel_dmc_debugfs_status_show(struct seq_file *m, void *unused)
 	seq_printf(m, "Pipe A fw loaded: %s\n",
 		   str_yes_no(has_dmc_id_fw(display, DMC_FW_PIPEA)));
 	seq_printf(m, "Pipe B fw needed: %s\n",
-		   str_yes_no(IS_ALDERLAKE_P(i915) ||
+		   str_yes_no(display->platform.alderlake_p ||
 			      DISPLAY_VER(display) >= 14));
 	seq_printf(m, "Pipe B fw loaded: %s\n",
 		   str_yes_no(has_dmc_id_fw(display, DMC_FW_PIPEB)));
@@ -1313,7 +1354,7 @@ static int intel_dmc_debugfs_status_show(struct seq_file *m, void *unused)
 	if (DISPLAY_VER(display) >= 12) {
 		i915_reg_t dc3co_reg;
 
-		if (IS_DGFX(i915) || DISPLAY_VER(display) >= 14) {
+		if (display->platform.dgfx || DISPLAY_VER(display) >= 14) {
 			dc3co_reg = DG1_DMC_DEBUG3;
 			dc5_reg = DG1_DMC_DEBUG_DC5_COUNT;
 		} else {
@@ -1325,9 +1366,9 @@ static int intel_dmc_debugfs_status_show(struct seq_file *m, void *unused)
 		seq_printf(m, "DC3CO count: %d\n",
 			   intel_de_read(display, dc3co_reg));
 	} else {
-		dc5_reg = IS_BROXTON(i915) ? BXT_DMC_DC3_DC5_COUNT :
+		dc5_reg = display->platform.broxton ? BXT_DMC_DC3_DC5_COUNT :
 			SKL_DMC_DC3_DC5_COUNT;
-		if (!IS_GEMINILAKE(i915) && !IS_BROXTON(i915))
+		if (!display->platform.geminilake && !display->platform.broxton)
 			dc6_reg = SKL_DMC_DC5_DC6_COUNT;
 	}
 
diff --git a/drivers/gpu/drm/i915/display/intel_dmc.h b/drivers/gpu/drm/i915/display/intel_dmc.h
index c78426eb4cd5..bd1c459b0075 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc.h
+++ b/drivers/gpu/drm/i915/display/intel_dmc.h
@@ -18,6 +18,10 @@ void intel_dmc_load_program(struct intel_display *display);
 void intel_dmc_disable_program(struct intel_display *display);
 void intel_dmc_enable_pipe(struct intel_display *display, enum pipe pipe);
 void intel_dmc_disable_pipe(struct intel_display *display, enum pipe pipe);
+void intel_dmc_block_pkgc(struct intel_display *display, enum pipe pipe,
+			  bool block);
+void intel_dmc_start_pkgc_exit_at_start_of_undelayed_vblank(struct intel_display *display,
+							    enum pipe pipe, bool enable);
 void intel_dmc_fini(struct intel_display *display);
 void intel_dmc_suspend(struct intel_display *display);
 void intel_dmc_resume(struct intel_display *display);
diff --git a/drivers/gpu/drm/i915/display/intel_dmc_regs.h b/drivers/gpu/drm/i915/display/intel_dmc_regs.h
index 1bf446f96a10..e16ea3f16ed8 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_dmc_regs.h
@@ -21,6 +21,20 @@
 #define MTL_PIPEDMC_CONTROL		_MMIO(0x45250)
 #define  PIPEDMC_ENABLE_MTL(pipe)	REG_BIT(((pipe) - PIPE_A) * 4)
 
+#define _MTL_PIPEDMC_EVT_CTL_4_A	0x5f044
+#define _MTL_PIPEDMC_EVT_CTL_4_B	0x5f444
+#define MTL_PIPEDMC_EVT_CTL_4(pipe)	_MMIO_PIPE(pipe,		\
+						   _MTL_PIPEDMC_EVT_CTL_4_A, \
+						   _MTL_PIPEDMC_EVT_CTL_4_B)
+
+#define PIPEDMC_BLOCK_PKGC_SW_A	0x5f1d0
+#define PIPEDMC_BLOCK_PKGC_SW_B	0x5F5d0
+#define PIPEDMC_BLOCK_PKGC_SW(pipe)				_MMIO_PIPE(pipe, \
+									   PIPEDMC_BLOCK_PKGC_SW_A, \
+									   PIPEDMC_BLOCK_PKGC_SW_B)
+#define PIPEDMC_BLOCK_PKGC_SW_BLOCK_PKGC_ALWAYS			BIT(31)
+#define PIPEDMC_BLOCK_PKGC_SW_BLOCK_PKGC_UNTIL_NEXT_FRAMESTART	BIT(15)
+
 #define _ADLP_PIPEDMC_REG_MMIO_BASE_A	0x5f000
 #define _TGL_PIPEDMC_REG_MMIO_BASE_A	0x92000
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index d7a30d0992b7..593b29b56714 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -45,12 +45,13 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_fixed.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
 #include "g4x_dp.h"
-#include "i915_drv.h"
 #include "i915_irq.h"
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "intel_alpm.h"
 #include "intel_atomic.h"
 #include "intel_audio.h"
@@ -58,6 +59,7 @@
 #include "intel_combo_phy_regs.h"
 #include "intel_connector.h"
 #include "intel_crtc.h"
+#include "intel_crtc_state_dump.h"
 #include "intel_cx0_phy.h"
 #include "intel_ddi.h"
 #include "intel_de.h"
@@ -92,7 +94,6 @@
 #include "intel_tc.h"
 #include "intel_vdsc.h"
 #include "intel_vrr.h"
-#include "intel_crtc_state_dump.h"
 
 /* DP DSC throughput values used for slice count calculations KPixels/s */
 #define DP_DSC_PEAK_PIXEL_RATE			2720000
@@ -3104,6 +3105,76 @@ intel_dp_queue_modeset_retry_for_link(struct intel_atomic_state *state,
 	}
 }
 
+int intel_dp_compute_min_hblank(struct intel_crtc_state *crtc_state,
+				const struct drm_connector_state *conn_state)
+{
+	struct intel_display *display = to_intel_display(crtc_state);
+	const struct drm_display_mode *adjusted_mode =
+					&crtc_state->hw.adjusted_mode;
+	struct intel_connector *connector = to_intel_connector(conn_state->connector);
+	int symbol_size = intel_dp_is_uhbr(crtc_state) ? 32 : 8;
+	/*
+	 * min symbol cycles is 3(BS,VBID, BE) for 128b/132b and
+	 * 5(BS, VBID, MVID, MAUD, BE) for 8b/10b
+	 */
+	int min_sym_cycles = intel_dp_is_uhbr(crtc_state) ? 3 : 5;
+	bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST);
+	int num_joined_pipes = intel_crtc_num_joined_pipes(crtc_state);
+	int min_hblank;
+	int max_lane_count = 4;
+	int hactive_sym_cycles, htotal_sym_cycles;
+	int dsc_slices = 0;
+	int link_bpp_x16;
+
+	if (DISPLAY_VER(display) < 30)
+		return 0;
+
+	/* MIN_HBLANK should be set only for 8b/10b MST or for 128b/132b SST/MST */
+	if (!is_mst && !intel_dp_is_uhbr(crtc_state))
+		return 0;
+
+	if (crtc_state->dsc.compression_enable) {
+		dsc_slices = intel_dp_dsc_get_slice_count(connector,
+							  adjusted_mode->crtc_clock,
+							  adjusted_mode->crtc_hdisplay,
+							  num_joined_pipes);
+		if (!dsc_slices) {
+			drm_dbg(display->drm, "failed to calculate dsc slice count\n");
+			return -EINVAL;
+		}
+	}
+
+	if (crtc_state->dsc.compression_enable)
+		link_bpp_x16 = crtc_state->dsc.compressed_bpp_x16;
+	else
+		link_bpp_x16 = fxp_q4_from_int(intel_dp_output_bpp(crtc_state->output_format,
+								   crtc_state->pipe_bpp));
+
+	/* Calculate min Hblank Link Layer Symbol Cycle Count for 8b/10b MST & 128b/132b */
+	hactive_sym_cycles = drm_dp_link_symbol_cycles(max_lane_count,
+						       adjusted_mode->hdisplay,
+						       dsc_slices,
+						       link_bpp_x16,
+						       symbol_size, is_mst);
+	htotal_sym_cycles = adjusted_mode->htotal * hactive_sym_cycles /
+			     adjusted_mode->hdisplay;
+
+	min_hblank = htotal_sym_cycles - hactive_sym_cycles;
+	/* minimum Hblank calculation: https://groups.vesa.org/wg/DP/document/20494 */
+	min_hblank = max(min_hblank, min_sym_cycles);
+
+	/*
+	 * adjust the BlankingStart/BlankingEnd framing control from
+	 * the calculated value
+	 */
+	min_hblank = min_hblank - 2;
+
+	min_hblank = min(10, min_hblank);
+	crtc_state->min_hblank = min_hblank;
+
+	return 0;
+}
+
 int
 intel_dp_compute_config(struct intel_encoder *encoder,
 			struct intel_crtc_state *pipe_config,
@@ -3203,6 +3274,10 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 				       &pipe_config->dp_m_n);
 	}
 
+	ret = intel_dp_compute_min_hblank(pipe_config, conn_state);
+	if (ret)
+		return ret;
+
 	/* FIXME: abstract this better */
 	if (pipe_config->splitter.enable)
 		pipe_config->dp_m_n.data_m *= pipe_config->splitter.link_count;
@@ -5392,6 +5467,11 @@ intel_dp_short_pulse(struct intel_dp *intel_dp)
 
 	intel_psr_short_pulse(intel_dp);
 
+	if (intel_alpm_get_error(intel_dp)) {
+		intel_alpm_disable(intel_dp);
+		intel_dp->alpm_parameters.sink_alpm_error = true;
+	}
+
 	if (intel_dp_test_short_pulse(intel_dp))
 		reprobe_needed = true;
 
@@ -5827,20 +5907,21 @@ out_vdd_off:
 }
 
 static void
-intel_dp_force(struct drm_connector *connector)
+intel_dp_force(struct drm_connector *_connector)
 {
-	struct intel_display *display = to_intel_display(connector->dev);
-	struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector));
+	struct intel_connector *connector = to_intel_connector(_connector);
+	struct intel_display *display = to_intel_display(connector);
+	struct intel_dp *intel_dp = intel_attached_dp(connector);
 
 	drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s]\n",
-		    connector->base.id, connector->name);
+		    connector->base.base.id, connector->base.name);
 
 	if (!intel_display_driver_check_access(display))
 		return;
 
 	intel_dp_unset_edid(intel_dp);
 
-	if (connector->status != connector_status_connected)
+	if (connector->base.status != connector_status_connected)
 		return;
 
 	intel_dp_set_edid(intel_dp);
@@ -5879,24 +5960,25 @@ static int intel_dp_get_modes(struct drm_connector *_connector)
 }
 
 static int
-intel_dp_connector_register(struct drm_connector *connector)
+intel_dp_connector_register(struct drm_connector *_connector)
 {
-	struct intel_display *display = to_intel_display(connector->dev);
-	struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector));
+	struct intel_connector *connector = to_intel_connector(_connector);
+	struct intel_display *display = to_intel_display(connector);
+	struct intel_dp *intel_dp = intel_attached_dp(connector);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	int ret;
 
-	ret = intel_connector_register(connector);
+	ret = intel_connector_register(&connector->base);
 	if (ret)
 		return ret;
 
 	drm_dbg_kms(display->drm, "registering %s bus for %s\n",
-		    intel_dp->aux.name, connector->kdev->kobj.name);
+		    intel_dp->aux.name, connector->base.kdev->kobj.name);
 
-	intel_dp->aux.dev = connector->kdev;
+	intel_dp->aux.dev = connector->base.kdev;
 	ret = drm_dp_aux_register(&intel_dp->aux);
 	if (!ret)
-		drm_dp_cec_register_connector(&intel_dp->aux, connector);
+		drm_dp_cec_register_connector(&intel_dp->aux, &connector->base);
 
 	if (!intel_bios_encoder_is_lspcon(dig_port->base.devdata))
 		return ret;
@@ -5907,20 +5989,21 @@ intel_dp_connector_register(struct drm_connector *connector)
 	 */
 	if (intel_lspcon_init(dig_port)) {
 		if (intel_lspcon_detect_hdr_capability(dig_port))
-			drm_connector_attach_hdr_output_metadata_property(connector);
+			drm_connector_attach_hdr_output_metadata_property(&connector->base);
 	}
 
 	return ret;
 }
 
 static void
-intel_dp_connector_unregister(struct drm_connector *connector)
+intel_dp_connector_unregister(struct drm_connector *_connector)
 {
-	struct intel_dp *intel_dp = intel_attached_dp(to_intel_connector(connector));
+	struct intel_connector *connector = to_intel_connector(_connector);
+	struct intel_dp *intel_dp = intel_attached_dp(connector);
 
 	drm_dp_cec_unregister_connector(&intel_dp->aux);
 	drm_dp_aux_unregister(&intel_dp->aux);
-	intel_connector_unregister(connector);
+	intel_connector_unregister(&connector->base);
 }
 
 void intel_dp_connector_sync_state(struct intel_connector *connector,
@@ -5981,21 +6064,21 @@ static int intel_modeset_tile_group(struct intel_atomic_state *state,
 {
 	struct intel_display *display = to_intel_display(state);
 	struct drm_connector_list_iter conn_iter;
-	struct drm_connector *connector;
+	struct intel_connector *connector;
 	int ret = 0;
 
 	drm_connector_list_iter_begin(display->drm, &conn_iter);
-	drm_for_each_connector_iter(connector, &conn_iter) {
+	for_each_intel_connector_iter(connector, &conn_iter) {
 		struct drm_connector_state *conn_state;
 		struct intel_crtc_state *crtc_state;
 		struct intel_crtc *crtc;
 
-		if (!connector->has_tile ||
-		    connector->tile_group->id != tile_group_id)
+		if (!connector->base.has_tile ||
+		    connector->base.tile_group->id != tile_group_id)
 			continue;
 
 		conn_state = drm_atomic_get_connector_state(&state->base,
-							    connector);
+							    &connector->base);
 		if (IS_ERR(conn_state)) {
 			ret = PTR_ERR(conn_state);
 			break;
@@ -6059,10 +6142,11 @@ static int intel_modeset_affected_transcoders(struct intel_atomic_state *state,
 }
 
 static int intel_modeset_synced_crtcs(struct intel_atomic_state *state,
-				      struct drm_connector *connector)
+				      struct drm_connector *_connector)
 {
+	struct intel_connector *connector = to_intel_connector(_connector);
 	const struct drm_connector_state *old_conn_state =
-		drm_atomic_get_old_connector_state(&state->base, connector);
+		drm_atomic_get_old_connector_state(&state->base, &connector->base);
 	const struct intel_crtc_state *old_crtc_state;
 	struct intel_crtc *crtc;
 	u8 transcoders;
@@ -6084,17 +6168,18 @@ static int intel_modeset_synced_crtcs(struct intel_atomic_state *state,
 						  transcoders);
 }
 
-static int intel_dp_connector_atomic_check(struct drm_connector *conn,
+static int intel_dp_connector_atomic_check(struct drm_connector *_connector,
 					   struct drm_atomic_state *_state)
 {
-	struct intel_display *display = to_intel_display(conn->dev);
+	struct intel_connector *connector = to_intel_connector(_connector);
+	struct intel_display *display = to_intel_display(connector);
 	struct intel_atomic_state *state = to_intel_atomic_state(_state);
-	struct drm_connector_state *conn_state = drm_atomic_get_new_connector_state(_state, conn);
-	struct intel_connector *intel_conn = to_intel_connector(conn);
-	struct intel_dp *intel_dp = enc_to_intel_dp(intel_conn->encoder);
+	struct drm_connector_state *conn_state =
+		drm_atomic_get_new_connector_state(_state, &connector->base);
+	struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
 	int ret;
 
-	ret = intel_digital_connector_atomic_check(conn, &state->base);
+	ret = intel_digital_connector_atomic_check(&connector->base, &state->base);
 	if (ret)
 		return ret;
 
@@ -6104,12 +6189,12 @@ static int intel_dp_connector_atomic_check(struct drm_connector *conn,
 			return ret;
 	}
 
-	if (!intel_connector_needs_modeset(state, conn))
+	if (!intel_connector_needs_modeset(state, &connector->base))
 		return 0;
 
 	ret = intel_dp_tunnel_atomic_check_state(state,
 						 intel_dp,
-						 intel_conn);
+						 connector);
 	if (ret)
 		return ret;
 
@@ -6120,26 +6205,26 @@ static int intel_dp_connector_atomic_check(struct drm_connector *conn,
 	if (DISPLAY_VER(display) < 9)
 		return 0;
 
-	if (conn->has_tile) {
-		ret = intel_modeset_tile_group(state, conn->tile_group->id);
+	if (connector->base.has_tile) {
+		ret = intel_modeset_tile_group(state, connector->base.tile_group->id);
 		if (ret)
 			return ret;
 	}
 
-	return intel_modeset_synced_crtcs(state, conn);
+	return intel_modeset_synced_crtcs(state, &connector->base);
 }
 
-static void intel_dp_oob_hotplug_event(struct drm_connector *connector,
+static void intel_dp_oob_hotplug_event(struct drm_connector *_connector,
 				       enum drm_connector_status hpd_state)
 {
-	struct intel_display *display = to_intel_display(connector->dev);
-	struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector));
-	struct drm_i915_private *i915 = to_i915(connector->dev);
+	struct intel_connector *connector = to_intel_connector(_connector);
+	struct intel_display *display = to_intel_display(connector);
+	struct intel_encoder *encoder = intel_attached_encoder(connector);
 	bool hpd_high = hpd_state == connector_status_connected;
 	unsigned int hpd_pin = encoder->hpd_pin;
 	bool need_work = false;
 
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	if (hpd_high != test_bit(hpd_pin, &display->hotplug.oob_hotplug_last_state)) {
 		display->hotplug.event_bits |= BIT(hpd_pin);
 
@@ -6148,7 +6233,7 @@ static void intel_dp_oob_hotplug_event(struct drm_connector *connector,
 			     hpd_high);
 		need_work = true;
 	}
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 
 	if (need_work)
 		intel_hpd_schedule_detection(display);
@@ -6280,36 +6365,37 @@ intel_dp_has_gamut_metadata_dip(struct intel_encoder *encoder)
 }
 
 static void
-intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector)
+intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *_connector)
 {
+	struct intel_connector *connector = to_intel_connector(_connector);
 	struct intel_display *display = to_intel_display(intel_dp);
 	enum port port = dp_to_dig_port(intel_dp)->base.port;
 
 	if (!intel_dp_is_edp(intel_dp))
-		drm_connector_attach_dp_subconnector_property(connector);
+		drm_connector_attach_dp_subconnector_property(&connector->base);
 
 	if (!display->platform.g4x && port != PORT_A)
-		intel_attach_force_audio_property(connector);
+		intel_attach_force_audio_property(&connector->base);
 
-	intel_attach_broadcast_rgb_property(connector);
+	intel_attach_broadcast_rgb_property(&connector->base);
 	if (HAS_GMCH(display))
-		drm_connector_attach_max_bpc_property(connector, 6, 10);
+		drm_connector_attach_max_bpc_property(&connector->base, 6, 10);
 	else if (DISPLAY_VER(display) >= 5)
-		drm_connector_attach_max_bpc_property(connector, 6, 12);
+		drm_connector_attach_max_bpc_property(&connector->base, 6, 12);
 
 	/* Register HDMI colorspace for case of lspcon */
 	if (intel_bios_encoder_is_lspcon(dp_to_dig_port(intel_dp)->base.devdata)) {
-		drm_connector_attach_content_type_property(connector);
-		intel_attach_hdmi_colorspace_property(connector);
+		drm_connector_attach_content_type_property(&connector->base);
+		intel_attach_hdmi_colorspace_property(&connector->base);
 	} else {
-		intel_attach_dp_colorspace_property(connector);
+		intel_attach_dp_colorspace_property(&connector->base);
 	}
 
 	if (intel_dp_has_gamut_metadata_dip(&dp_to_dig_port(intel_dp)->base))
-		drm_connector_attach_hdr_output_metadata_property(connector);
+		drm_connector_attach_hdr_output_metadata_property(&connector->base);
 
 	if (HAS_VRR(display))
-		drm_connector_attach_vrr_capable_property(connector);
+		drm_connector_attach_vrr_capable_property(&connector->base);
 }
 
 static void
@@ -6344,7 +6430,6 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
 				     struct intel_connector *connector)
 {
 	struct intel_display *display = to_intel_display(intel_dp);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct drm_display_mode *fixed_mode;
 	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
 	bool has_dpcd;
@@ -6361,7 +6446,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
 	 */
 	if (intel_get_lvds_encoder(display)) {
 		drm_WARN_ON(display->drm,
-			    !(HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)));
+			    !(HAS_PCH_IBX(display) || HAS_PCH_CPT(display)));
 		drm_info(display->drm,
 			 "LVDS was detected, not registering eDP\n");
 
@@ -6392,7 +6477,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
 	 */
 	intel_hpd_enable_detection(encoder);
 
-	intel_alpm_init_dpcd(intel_dp);
+	intel_alpm_init(intel_dp);
 
 	/* Cache DPCD and EDID for edp. */
 	has_dpcd = intel_edp_init_dpcd(intel_dp, connector);
diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
index 9189db4c2594..a7cc10800e0b 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.h
+++ b/drivers/gpu/drm/i915/display/intel_dp.h
@@ -208,5 +208,7 @@ bool intel_dp_has_connector(struct intel_dp *intel_dp,
 			    const struct drm_connector_state *conn_state);
 int intel_dp_dsc_max_src_input_bpc(struct intel_display *display);
 int intel_dp_dsc_min_src_input_bpc(void);
+int intel_dp_compute_min_hblank(struct intel_crtc_state *crtc_state,
+				const struct drm_connector_state *conn_state);
 
 #endif /* __INTEL_DP_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c
index 0496061203fb..bf8e8e0cc19c 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
@@ -3,8 +3,10 @@
  * Copyright © 2020-2021 Intel Corporation
  */
 
-#include "i915_drv.h"
+#include <drm/drm_print.h>
+
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
 #include "intel_dp.h"
@@ -111,10 +113,9 @@ static u32 ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index)
 static u32 hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index)
 {
 	struct intel_display *display = to_intel_display(intel_dp);
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 
-	if (dig_port->aux_ch != AUX_CH_A && HAS_PCH_LPT_H(i915)) {
+	if (dig_port->aux_ch != AUX_CH_A && HAS_PCH_LPT_H(display)) {
 		/* Workaround for non-ULT HSW */
 		switch (index) {
 		case 0: return 63;
@@ -177,12 +178,11 @@ static u32 g4x_get_aux_send_ctl(struct intel_dp *intel_dp,
 				int send_bytes,
 				u32 aux_clock_divider)
 {
-	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
-	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
+	struct intel_display *display = to_intel_display(intel_dp);
 	u32 timeout;
 
 	/* Max timeout value on G4x-BDW: 1.6ms */
-	if (IS_BROADWELL(i915))
+	if (display->platform.broadwell)
 		timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
 	else
 		timeout = DP_AUX_CH_CTL_TIME_OUT_400us;
@@ -786,7 +786,6 @@ void intel_dp_aux_fini(struct intel_dp *intel_dp)
 void intel_dp_aux_init(struct intel_dp *intel_dp)
 {
 	struct intel_display *display = to_intel_display(intel_dp);
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct intel_encoder *encoder = &dig_port->base;
 	enum aux_ch aux_ch = dig_port->aux_ch;
@@ -801,10 +800,10 @@ void intel_dp_aux_init(struct intel_dp *intel_dp)
 	} else if (DISPLAY_VER(display) >= 9) {
 		intel_dp->aux_ch_ctl_reg = skl_aux_ctl_reg;
 		intel_dp->aux_ch_data_reg = skl_aux_data_reg;
-	} else if (HAS_PCH_SPLIT(i915)) {
+	} else if (HAS_PCH_SPLIT(display)) {
 		intel_dp->aux_ch_ctl_reg = ilk_aux_ctl_reg;
 		intel_dp->aux_ch_data_reg = ilk_aux_data_reg;
-	} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+	} else if (display->platform.valleyview || display->platform.cherryview) {
 		intel_dp->aux_ch_ctl_reg = vlv_aux_ctl_reg;
 		intel_dp->aux_ch_data_reg = vlv_aux_data_reg;
 	} else {
@@ -814,9 +813,9 @@ void intel_dp_aux_init(struct intel_dp *intel_dp)
 
 	if (DISPLAY_VER(display) >= 9)
 		intel_dp->get_aux_clock_divider = skl_get_aux_clock_divider;
-	else if (IS_BROADWELL(i915) || IS_HASWELL(i915))
+	else if (display->platform.broadwell || display->platform.haswell)
 		intel_dp->get_aux_clock_divider = hsw_get_aux_clock_divider;
-	else if (HAS_PCH_SPLIT(i915))
+	else if (HAS_PCH_SPLIT(display))
 		intel_dp->get_aux_clock_divider = ilk_get_aux_clock_divider;
 	else
 		intel_dp->get_aux_clock_divider = g4x_get_aux_clock_divider;
diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
index 20ab90acb351..271b27c9de51 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
@@ -36,7 +36,6 @@
 
 #include <drm/drm_print.h>
 
-#include "i915_utils.h"
 #include "intel_backlight.h"
 #include "intel_display_core.h"
 #include "intel_display_types.h"
@@ -149,7 +148,7 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector)
 	    !(connector->base.hdr_sink_metadata.hdmi_type1.metadata_type &
 	      BIT(HDMI_STATIC_METADATA_TYPE1))) {
 		drm_info(display->drm,
-			 "[CONNECTOR:%d:%s] Panel is missing HDR static metadata. Possible support for Intel HDR backlight interface is not used. If your backlight controls don't work try booting with i915.enable_dpcd_backlight=%d. needs this, please file a _new_ bug report on drm/i915, see " FDO_BUG_URL " for details.\n",
+			 "[CONNECTOR:%d:%s] Panel is missing HDR static metadata. Possible support for Intel HDR backlight interface is not used. If your backlight controls don't work try booting with i915.enable_dpcd_backlight=%d.\n",
 			 connector->base.base.id, connector->base.name,
 			 INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL);
 		return false;
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 4c15dcb103aa..aeda59f5fa7a 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -27,10 +27,11 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_fixed.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
-#include "i915_drv.h"
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "intel_atomic.h"
 #include "intel_audio.h"
 #include "intel_connector.h"
@@ -51,6 +52,7 @@
 #include "intel_link_bw.h"
 #include "intel_pfit.h"
 #include "intel_psr.h"
+#include "intel_step.h"
 #include "intel_vdsc.h"
 #include "intel_vrr.h"
 #include "skl_scaler.h"
@@ -239,26 +241,6 @@ static int intel_dp_mst_dsc_get_slice_count(const struct intel_connector *connec
 					    num_joined_pipes);
 }
 
-static void intel_dp_mst_compute_min_hblank(struct intel_crtc_state *crtc_state,
-					    int bpp_x16)
-{
-	struct intel_display *display = to_intel_display(crtc_state);
-	const struct drm_display_mode *adjusted_mode =
-					&crtc_state->hw.adjusted_mode;
-	int symbol_size = intel_dp_is_uhbr(crtc_state) ? 32 : 8;
-	int hblank;
-
-	if (DISPLAY_VER(display) < 20)
-		return;
-
-	/* Calculate min Hblank Link Layer Symbol Cycle Count for 8b/10b MST & 128b/132b */
-	hblank = DIV_ROUND_UP((DIV_ROUND_UP
-			       (adjusted_mode->htotal - adjusted_mode->hdisplay, 4) * bpp_x16),
-			      symbol_size);
-
-	crtc_state->min_hblank = hblank;
-}
-
 int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp,
 				   struct intel_crtc_state *crtc_state,
 				   struct drm_connector_state *conn_state,
@@ -329,8 +311,6 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp,
 		local_bw_overhead = intel_dp_mst_bw_overhead(crtc_state,
 							     false, dsc_slice_count, link_bpp_x16);
 
-		intel_dp_mst_compute_min_hblank(crtc_state, link_bpp_x16);
-
 		intel_dp_mst_compute_m_n(crtc_state,
 					 local_bw_overhead,
 					 link_bpp_x16,
@@ -739,6 +719,10 @@ static int mst_stream_compute_config(struct intel_encoder *encoder,
 		pipe_config->lane_lat_optim_mask =
 			bxt_dpio_phy_calc_lane_lat_optim_mask(pipe_config->lane_count);
 
+	ret = intel_dp_compute_min_hblank(pipe_config, conn_state);
+	if (ret)
+		return ret;
+
 	intel_vrr_compute_config(pipe_config, conn_state);
 
 	intel_dp_audio_compute_config(encoder, pipe_config, conn_state);
@@ -1021,12 +1005,10 @@ static void mst_stream_disable(struct intel_atomic_state *state,
 			       const struct intel_crtc_state *old_crtc_state,
 			       const struct drm_connector_state *old_conn_state)
 {
-	struct intel_display *display = to_intel_display(state);
 	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_dp *intel_dp = to_primary_dp(encoder);
 	struct intel_connector *connector =
 		to_intel_connector(old_conn_state->connector);
-	enum transcoder trans = old_crtc_state->cpu_transcoder;
 
 	if (intel_dp_mst_active_streams(intel_dp) == 1)
 		intel_dp->link.active = false;
@@ -1034,9 +1016,6 @@ static void mst_stream_disable(struct intel_atomic_state *state,
 	intel_hdcp_disable(intel_mst->connector);
 
 	intel_dp_sink_disable_decompression(state, connector, old_crtc_state);
-
-	if (DISPLAY_VER(display) >= 20)
-		intel_de_write(display, DP_MIN_HBLANK_CTL(trans), 0);
 }
 
 static void mst_stream_post_disable(struct intel_atomic_state *state,
@@ -1305,7 +1284,7 @@ static void mst_stream_enable(struct intel_atomic_state *state,
 	enum transcoder trans = pipe_config->cpu_transcoder;
 	bool first_mst_stream = intel_dp_mst_active_streams(intel_dp) == 1;
 	struct intel_crtc *pipe_crtc;
-	int ret, i, min_hblank;
+	int ret, i;
 
 	drm_WARN_ON(display->drm, pipe_config->has_pch_encoder);
 
@@ -1320,29 +1299,6 @@ static void mst_stream_enable(struct intel_atomic_state *state,
 			       TRANS_DP2_VFREQ_PIXEL_CLOCK(crtc_clock_hz & 0xffffff));
 	}
 
-	if (DISPLAY_VER(display) >= 20) {
-		/*
-		 * adjust the BlankingStart/BlankingEnd framing control from
-		 * the calculated value
-		 */
-		min_hblank = pipe_config->min_hblank - 2;
-
-		/* Maximum value to be programmed is limited to 0x10 */
-		min_hblank = min(0x10, min_hblank);
-
-		/*
-		 * Minimum hblank accepted for 128b/132b would be 5 and for
-		 * 8b/10b would be 3 symbol count
-		 */
-		if (intel_dp_is_uhbr(pipe_config))
-			min_hblank = max(min_hblank, 5);
-		else
-			min_hblank = max(min_hblank, 3);
-
-		intel_de_write(display, DP_MIN_HBLANK_CTL(trans),
-			       min_hblank);
-	}
-
 	enable_bs_jitter_was(pipe_config);
 
 	intel_ddi_enable_transcoder_func(encoder, pipe_config);
diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c
index 429f89543789..69f242139420 100644
--- a/drivers/gpu/drm/i915/display/intel_dpio_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c
@@ -222,9 +222,7 @@ static const struct bxt_dpio_phy_info glk_dpio_phy_info[] = {
 static const struct bxt_dpio_phy_info *
 bxt_get_phy_list(struct intel_display *display, int *count)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
-	if (IS_GEMINILAKE(dev_priv)) {
+	if (display->platform.geminilake) {
 		*count =  ARRAY_SIZE(glk_dpio_phy_info);
 		return glk_dpio_phy_info;
 	} else {
@@ -808,9 +806,9 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder,
 	vlv_dpio_put(dev_priv);
 }
 
-void chv_data_lane_soft_reset(struct intel_encoder *encoder,
-			      const struct intel_crtc_state *crtc_state,
-			      bool reset)
+static void __chv_data_lane_soft_reset(struct intel_encoder *encoder,
+				       const struct intel_crtc_state *crtc_state,
+				       bool reset)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
@@ -853,6 +851,17 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder,
 	}
 }
 
+void chv_data_lane_soft_reset(struct intel_encoder *encoder,
+			      const struct intel_crtc_state *crtc_state,
+			      bool reset)
+{
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+
+	vlv_dpio_get(i915);
+	__chv_data_lane_soft_reset(encoder, crtc_state, reset);
+	vlv_dpio_put(i915);
+}
+
 void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
 			    const struct intel_crtc_state *crtc_state)
 {
@@ -880,7 +889,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
 	vlv_dpio_get(dev_priv);
 
 	/* Assert data lane reset */
-	chv_data_lane_soft_reset(encoder, crtc_state, true);
+	__chv_data_lane_soft_reset(encoder, crtc_state, true);
 
 	/* program left/right clock distribution */
 	if (pipe != PIPE_B) {
@@ -1008,7 +1017,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	}
 
 	/* Deassert data lane reset */
-	chv_data_lane_soft_reset(encoder, crtc_state, false);
+	__chv_data_lane_soft_reset(encoder, crtc_state, false);
 
 	vlv_dpio_put(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c
index 0481b1365b85..a9e9b98d0bf9 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll.c
@@ -374,12 +374,11 @@ int chv_calc_dpll_params(int refclk, struct dpll *clock)
 static int i9xx_pll_refclk(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_display *display = to_intel_display(crtc_state);
-	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
 	const struct i9xx_dpll_hw_state *hw_state = &crtc_state->dpll_hw_state.i9xx;
 
 	if ((hw_state->dpll & PLL_REF_INPUT_MASK) == PLLB_REF_INPUT_SPREADSPECTRUMIN)
 		return display->vbt.lvds_ssc_freq;
-	else if (HAS_PCH_SPLIT(i915))
+	else if (HAS_PCH_SPLIT(display))
 		return 120000;
 	else if (DISPLAY_VER(display) != 2)
 		return 96000;
@@ -1235,12 +1234,10 @@ static int mtl_crtc_compute_clock(struct intel_atomic_state *state,
 static int ilk_fb_cb_factor(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_display *display = to_intel_display(crtc_state);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 
 	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_LVDS) &&
 	    ((intel_panel_use_ssc(display) && display->vbt.lvds_ssc_freq == 100000) ||
-	     (HAS_PCH_IBX(i915) && intel_is_dual_link_lvds(display))))
+	     (HAS_PCH_IBX(display) && intel_is_dual_link_lvds(display))))
 		return 25;
 
 	if (crtc_state->sdvo_tv_clock)
@@ -1791,15 +1788,13 @@ int intel_dpll_crtc_get_shared_dpll(struct intel_atomic_state *state,
 void
 intel_dpll_init_clock_hook(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
 	if (DISPLAY_VER(display) >= 14)
 		display->funcs.dpll = &mtl_dpll_funcs;
 	else if (display->platform.dg2)
 		display->funcs.dpll = &dg2_dpll_funcs;
 	else if (DISPLAY_VER(display) >= 9 || HAS_DDI(display))
 		display->funcs.dpll = &hsw_dpll_funcs;
-	else if (HAS_PCH_SPLIT(dev_priv))
+	else if (HAS_PCH_SPLIT(display))
 		display->funcs.dpll = &ilk_dpll_funcs;
 	else if (display->platform.cherryview)
 		display->funcs.dpll = &chv_dpll_funcs;
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 84df41086a89..9da051a3f455 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -24,9 +24,11 @@
 #include <linux/math.h>
 #include <linux/string_helpers.h>
 
+#include <drm/drm_print.h>
+
 #include "bxt_dpio_phy_regs.h"
-#include "i915_drv.h"
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "intel_cx0_phy.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
@@ -38,6 +40,7 @@
 #include "intel_hti.h"
 #include "intel_mg_phy_regs.h"
 #include "intel_pch_refclk.h"
+#include "intel_step.h"
 #include "intel_tc.h"
 
 /**
@@ -609,13 +612,12 @@ static int ibx_get_dpll(struct intel_atomic_state *state,
 			struct intel_encoder *encoder)
 {
 	struct intel_display *display = to_intel_display(state);
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct intel_crtc_state *crtc_state =
 		intel_atomic_get_new_crtc_state(state, crtc);
 	struct intel_shared_dpll *pll;
 	enum intel_dpll_id id;
 
-	if (HAS_PCH_IBX(i915)) {
+	if (HAS_PCH_IBX(display)) {
 		/* Ironlake PCH has a fixed PLL->PCH pipe mapping. */
 		id = (enum intel_dpll_id) crtc->pipe;
 		pll = intel_get_shared_dpll_by_id(display, id);
@@ -4305,7 +4307,6 @@ static const struct intel_dpll_mgr adlp_pll_mgr = {
  */
 void intel_shared_dpll_init(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	const struct intel_dpll_mgr *dpll_mgr = NULL;
 	const struct dpll_info *dpll_info;
 	int i;
@@ -4335,7 +4336,7 @@ void intel_shared_dpll_init(struct intel_display *display)
 		dpll_mgr = &skl_pll_mgr;
 	else if (HAS_DDI(display))
 		dpll_mgr = &hsw_pll_mgr;
-	else if (HAS_PCH_IBX(i915) || HAS_PCH_CPT(i915))
+	else if (HAS_PCH_IBX(display) || HAS_PCH_CPT(display))
 		dpll_mgr = &pch_pll_mgr;
 
 	if (!dpll_mgr)
diff --git a/drivers/gpu/drm/i915/display/intel_dpt_common.c b/drivers/gpu/drm/i915/display/intel_dpt_common.c
index d2dede0a5229..ce5aa0ca0fa5 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt_common.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt_common.c
@@ -3,7 +3,6 @@
  * Copyright © 2023 Intel Corporation
  */
 
-#include "i915_drv.h"
 #include "i915_reg.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
@@ -12,9 +11,9 @@
 
 void intel_dpt_configure(struct intel_crtc *crtc)
 {
-	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	struct intel_display *display = to_intel_display(crtc);
 
-	if (DISPLAY_VER(i915) == 14) {
+	if (DISPLAY_VER(display) == 14) {
 		enum pipe pipe = crtc->pipe;
 		enum plane_id plane_id;
 
@@ -22,15 +21,15 @@ void intel_dpt_configure(struct intel_crtc *crtc)
 			if (plane_id == PLANE_CURSOR)
 				continue;
 
-			intel_de_rmw(i915, PLANE_CHICKEN(pipe, plane_id),
+			intel_de_rmw(display, PLANE_CHICKEN(pipe, plane_id),
 				     PLANE_CHICKEN_DISABLE_DPT,
-				     i915->display.params.enable_dpt ? 0 :
+				     display->params.enable_dpt ? 0 :
 				     PLANE_CHICKEN_DISABLE_DPT);
 		}
-	} else if (DISPLAY_VER(i915) == 13) {
-		intel_de_rmw(i915, CHICKEN_MISC_2,
+	} else if (DISPLAY_VER(display) == 13) {
+		intel_de_rmw(display, CHICKEN_MISC_2,
 			     CHICKEN_MISC_DISABLE_DPT,
-			     i915->display.params.enable_dpt ? 0 :
+			     display->params.enable_dpt ? 0 :
 			     CHICKEN_MISC_DISABLE_DPT);
 	}
 }
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
index 72fe390c5af2..481488d1fe67 100644
--- a/drivers/gpu/drm/i915/display/intel_dsb.c
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -4,11 +4,12 @@
  *
  */
 
+#include <drm/drm_print.h>
 #include <drm/drm_vblank.h>
 
-#include "i915_drv.h"
 #include "i915_irq.h"
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "intel_crtc.h"
 #include "intel_de.h"
 #include "intel_display_rpm.h"
diff --git a/drivers/gpu/drm/i915/display/intel_dsi.c b/drivers/gpu/drm/i915/display/intel_dsi.c
index 403151175a87..a8f012119165 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi.c
@@ -4,8 +4,9 @@
  */
 
 #include <drm/drm_mipi_dsi.h>
+#include <drm/drm_print.h>
 
-#include "i915_drv.h"
+#include "intel_display_core.h"
 #include "intel_dsi.h"
 #include "intel_panel.h"
 
@@ -116,14 +117,14 @@ struct intel_dsi_host *intel_dsi_host_init(struct intel_dsi *intel_dsi,
 enum drm_panel_orientation
 intel_dsi_get_panel_orientation(struct intel_connector *connector)
 {
-	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+	struct intel_display *display = to_intel_display(connector);
 	enum drm_panel_orientation orientation;
 
 	orientation = connector->panel.vbt.dsi.orientation;
 	if (orientation != DRM_MODE_PANEL_ORIENTATION_UNKNOWN)
 		return orientation;
 
-	orientation = dev_priv->display.vbt.orientation;
+	orientation = display->vbt.orientation;
 	if (orientation != DRM_MODE_PANEL_ORIENTATION_UNKNOWN)
 		return orientation;
 
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 4e92504f5c14..29c920983413 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -31,16 +31,16 @@
 #include <linux/pinctrl/machine.h>
 #include <linux/slab.h>
 #include <linux/string_helpers.h>
-
 #include <linux/unaligned.h>
 
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_print.h>
 
 #include <video/mipi_display.h>
 
-#include "i915_drv.h"
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
 #include "intel_dsi.h"
@@ -321,7 +321,6 @@ enum {
 static void icl_native_gpio_set_value(struct intel_display *display,
 				      int gpio, bool value)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	int index;
 
 	if (drm_WARN_ON(display->drm, DISPLAY_VER(display) == 11 && gpio >= MIPI_RESET_2))
@@ -341,12 +340,12 @@ static void icl_native_gpio_set_value(struct intel_display *display,
 		 * The locking protects against concurrent SHOTPLUG_CTL_DDI
 		 * modifications in irq setup and handling.
 		 */
-		spin_lock_irq(&dev_priv->irq_lock);
+		spin_lock_irq(&display->irq.lock);
 		intel_de_rmw(display, SHOTPLUG_CTL_DDI,
 			     SHOTPLUG_CTL_DDI_HPD_ENABLE(index) |
 			     SHOTPLUG_CTL_DDI_HPD_OUTPUT_DATA(index),
 			     value ? SHOTPLUG_CTL_DDI_HPD_OUTPUT_DATA(index) : 0);
-		spin_unlock_irq(&dev_priv->irq_lock);
+		spin_unlock_irq(&display->irq.lock);
 		break;
 	case MIPI_AVDD_EN_1:
 	case MIPI_AVDD_EN_2:
diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index 2b0e0f220442..05393bd60c98 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -560,11 +560,11 @@ static bool plane_has_modifier(struct intel_display *display,
 		return false;
 
 	if (md->modifier == I915_FORMAT_MOD_4_TILED_BMG_CCS &&
-	    (GRAPHICS_VER(i915) < 20 || !IS_DGFX(i915)))
+	    (GRAPHICS_VER(i915) < 20 || !display->platform.dgfx))
 		return false;
 
 	if (md->modifier == I915_FORMAT_MOD_4_TILED_LNL_CCS &&
-	    (GRAPHICS_VER(i915) < 20 || IS_DGFX(i915)))
+	    (GRAPHICS_VER(i915) < 20 || display->platform.dgfx))
 		return false;
 
 	return true;
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index ce5b1e3f1c20..bed2bba20b55 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -252,9 +252,12 @@ static u16 intel_fbc_override_cfb_stride(const struct intel_plane_state *plane_s
 	 * Gen9 hw miscalculates cfb stride for linear as
 	 * PLANE_STRIDE*512 instead of PLANE_STRIDE*64, so
 	 * we always need to use the override there.
+	 *
+	 * wa_14022269668 For bmg, always program the FBC_STRIDE before fbc enable
 	 */
 	if (stride != stride_aligned ||
-	    (DISPLAY_VER(display) == 9 && fb->modifier == DRM_FORMAT_MOD_LINEAR))
+	    (DISPLAY_VER(display) == 9 && fb->modifier == DRM_FORMAT_MOD_LINEAR) ||
+	    display->platform.battlemage)
 		return stride_aligned * 4 / 64;
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 369f46286e95..2dc4029d71ed 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -47,9 +47,9 @@
 #include <drm/drm_managed.h>
 #include <drm/drm_print.h>
 
-#include "i915_drv.h"
 #include "i915_vma.h"
 #include "intel_bo.h"
+#include "intel_display_core.h"
 #include "intel_display_rpm.h"
 #include "intel_display_types.h"
 #include "intel_fb.h"
@@ -66,9 +66,9 @@ struct intel_fbdev {
 
 static struct intel_fbdev *to_intel_fbdev(struct drm_fb_helper *fb_helper)
 {
-	struct drm_i915_private *i915 = to_i915(fb_helper->client.dev);
+	struct intel_display *display = to_intel_display(fb_helper->client.dev);
 
-	return i915->display.fbdev.fbdev;
+	return display->fbdev.fbdev;
 }
 
 static struct intel_frontbuffer *to_frontbuffer(struct intel_fbdev *ifbdev)
@@ -210,11 +210,9 @@ static const struct drm_fb_helper_funcs intel_fb_helper_funcs = {
 int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper,
 				   struct drm_fb_helper_surface_size *sizes)
 {
+	struct intel_display *display = to_intel_display(helper->dev);
 	struct intel_fbdev *ifbdev = to_intel_fbdev(helper);
 	struct intel_framebuffer *fb = ifbdev->fb;
-	struct drm_device *dev = helper->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_display *display = to_intel_display(dev);
 	struct ref_tracker *wakeref;
 	struct fb_info *info;
 	struct i915_vma *vma;
@@ -228,7 +226,7 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper,
 	if (fb &&
 	    (sizes->fb_width > fb->base.width ||
 	     sizes->fb_height > fb->base.height)) {
-		drm_dbg_kms(&dev_priv->drm,
+		drm_dbg_kms(display->drm,
 			    "BIOS fb too small (%dx%d), we require (%dx%d),"
 			    " releasing it\n",
 			    fb->base.width, fb->base.height,
@@ -236,14 +234,14 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper,
 		drm_framebuffer_put(&fb->base);
 		fb = NULL;
 	}
-	if (!fb || drm_WARN_ON(dev, !intel_fb_bo(&fb->base))) {
-		drm_dbg_kms(&dev_priv->drm,
+	if (!fb || drm_WARN_ON(display->drm, !intel_fb_bo(&fb->base))) {
+		drm_dbg_kms(display->drm,
 			    "no BIOS fb, allocating a new one\n");
 		fb = intel_fbdev_fb_alloc(helper, sizes);
 		if (IS_ERR(fb))
 			return PTR_ERR(fb);
 	} else {
-		drm_dbg_kms(&dev_priv->drm, "re-using BIOS fb\n");
+		drm_dbg_kms(display->drm, "re-using BIOS fb\n");
 		prealloc = true;
 		sizes->fb_width = fb->base.width;
 		sizes->fb_height = fb->base.height;
@@ -267,7 +265,7 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper,
 
 	info = drm_fb_helper_alloc_info(helper);
 	if (IS_ERR(info)) {
-		drm_err(&dev_priv->drm, "Failed to allocate fb_info (%pe)\n", info);
+		drm_err(display->drm, "Failed to allocate fb_info (%pe)\n", info);
 		ret = PTR_ERR(info);
 		goto out_unpin;
 	}
@@ -279,11 +277,11 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper,
 
 	obj = intel_fb_bo(&fb->base);
 
-	ret = intel_fbdev_fb_fill_info(dev_priv, info, obj, vma);
+	ret = intel_fbdev_fb_fill_info(display, info, obj, vma);
 	if (ret)
 		goto out_unpin;
 
-	drm_fb_helper_fill_info(info, dev->fb_helper, sizes);
+	drm_fb_helper_fill_info(info, display->drm->fb_helper, sizes);
 
 	/* If the object is shmemfs backed, it will have given us zeroed pages.
 	 * If the object is stolen however, it will be full of whatever
@@ -294,7 +292,7 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper,
 
 	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
 
-	drm_dbg_kms(&dev_priv->drm, "allocated %dx%d fb: 0x%08x\n",
+	drm_dbg_kms(display->drm, "allocated %dx%d fb: 0x%08x\n",
 		    fb->base.width, fb->base.height,
 		    i915_ggtt_offset(vma));
 	ifbdev->fb = fb;
@@ -322,16 +320,15 @@ out_unlock:
  * Note we only support a single fb shared across pipes for boot (mostly for
  * fbcon), so we just find the biggest and use that.
  */
-static bool intel_fbdev_init_bios(struct drm_device *dev,
+static bool intel_fbdev_init_bios(struct intel_display *display,
 				  struct intel_fbdev *ifbdev)
 {
-	struct drm_i915_private *i915 = to_i915(dev);
 	struct intel_framebuffer *fb = NULL;
 	struct intel_crtc *crtc;
 	unsigned int max_size = 0;
 
 	/* Find the largest fb */
-	for_each_intel_crtc(dev, crtc) {
+	for_each_intel_crtc(display->drm, crtc) {
 		struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
 		struct intel_plane *plane =
@@ -341,21 +338,21 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 		struct drm_gem_object *obj = intel_fb_bo(plane_state->uapi.fb);
 
 		if (!crtc_state->uapi.active) {
-			drm_dbg_kms(&i915->drm,
+			drm_dbg_kms(display->drm,
 				    "[CRTC:%d:%s] not active, skipping\n",
 				    crtc->base.base.id, crtc->base.name);
 			continue;
 		}
 
 		if (!obj) {
-			drm_dbg_kms(&i915->drm,
+			drm_dbg_kms(display->drm,
 				    "[PLANE:%d:%s] no fb, skipping\n",
 				    plane->base.base.id, plane->base.name);
 			continue;
 		}
 
 		if (obj->size > max_size) {
-			drm_dbg_kms(&i915->drm,
+			drm_dbg_kms(display->drm,
 				    "found possible fb from [PLANE:%d:%s]\n",
 				    plane->base.base.id, plane->base.name);
 			fb = to_intel_framebuffer(plane_state->uapi.fb);
@@ -364,13 +361,13 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 	}
 
 	if (!fb) {
-		drm_dbg_kms(&i915->drm,
+		drm_dbg_kms(display->drm,
 			    "no active fbs found, not using BIOS config\n");
 		goto out;
 	}
 
 	/* Now make sure all the pipes will fit into it */
-	for_each_intel_crtc(dev, crtc) {
+	for_each_intel_crtc(display->drm, crtc) {
 		struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
 		struct intel_plane *plane =
@@ -378,13 +375,13 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 		unsigned int cur_size;
 
 		if (!crtc_state->uapi.active) {
-			drm_dbg_kms(&i915->drm,
+			drm_dbg_kms(display->drm,
 				    "[CRTC:%d:%s] not active, skipping\n",
 				    crtc->base.base.id, crtc->base.name);
 			continue;
 		}
 
-		drm_dbg_kms(&i915->drm, "checking [PLANE:%d:%s] for BIOS fb\n",
+		drm_dbg_kms(display->drm, "checking [PLANE:%d:%s] for BIOS fb\n",
 			    plane->base.base.id, plane->base.name);
 
 		/*
@@ -395,7 +392,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 		cur_size = crtc_state->uapi.adjusted_mode.crtc_hdisplay;
 		cur_size = cur_size * fb->base.format->cpp[0];
 		if (fb->base.pitches[0] < cur_size) {
-			drm_dbg_kms(&i915->drm,
+			drm_dbg_kms(display->drm,
 				    "fb not wide enough for [PLANE:%d:%s] (%d vs %d)\n",
 				    plane->base.base.id, plane->base.name,
 				    cur_size, fb->base.pitches[0]);
@@ -406,7 +403,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 		cur_size = crtc_state->uapi.adjusted_mode.crtc_vdisplay;
 		cur_size = intel_fb_align_height(&fb->base, 0, cur_size);
 		cur_size *= fb->base.pitches[0];
-		drm_dbg_kms(&i915->drm,
+		drm_dbg_kms(display->drm,
 			    "[CRTC:%d:%s] area: %dx%d, bpp: %d, size: %d\n",
 			    crtc->base.base.id, crtc->base.name,
 			    crtc_state->uapi.adjusted_mode.crtc_hdisplay,
@@ -415,7 +412,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 			    cur_size);
 
 		if (cur_size > max_size) {
-			drm_dbg_kms(&i915->drm,
+			drm_dbg_kms(display->drm,
 				    "fb not big enough for [PLANE:%d:%s] (%d vs %d)\n",
 				    plane->base.base.id, plane->base.name,
 				    cur_size, max_size);
@@ -423,14 +420,14 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 			break;
 		}
 
-		drm_dbg_kms(&i915->drm,
+		drm_dbg_kms(display->drm,
 			    "fb big enough [PLANE:%d:%s] (%d >= %d)\n",
 			    plane->base.base.id, plane->base.name,
 			    max_size, cur_size);
 	}
 
 	if (!fb) {
-		drm_dbg_kms(&i915->drm,
+		drm_dbg_kms(display->drm,
 			    "BIOS fb not suitable for all pipes, not using\n");
 		goto out;
 	}
@@ -440,7 +437,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 	drm_framebuffer_get(&ifbdev->fb->base);
 
 	/* Final pass to check if any active pipes don't have fbs */
-	for_each_intel_crtc(dev, crtc) {
+	for_each_intel_crtc(display->drm, crtc) {
 		struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
 		struct intel_plane *plane =
@@ -451,13 +448,13 @@ static bool intel_fbdev_init_bios(struct drm_device *dev,
 		if (!crtc_state->uapi.active)
 			continue;
 
-		drm_WARN(dev, !plane_state->uapi.fb,
+		drm_WARN(display->drm, !plane_state->uapi.fb,
 			 "re-used BIOS config but lost an fb on [PLANE:%d:%s]\n",
 			 plane->base.base.id, plane->base.name);
 	}
 
 
-	drm_dbg_kms(&i915->drm, "using BIOS fb for initial console\n");
+	drm_dbg_kms(display->drm, "using BIOS fb for initial console\n");
 	return true;
 
 out:
@@ -482,26 +479,25 @@ static unsigned int intel_fbdev_color_mode(const struct drm_format_info *info)
 	}
 }
 
-void intel_fbdev_setup(struct drm_i915_private *i915)
+void intel_fbdev_setup(struct intel_display *display)
 {
-	struct drm_device *dev = &i915->drm;
 	struct intel_fbdev *ifbdev;
 	unsigned int preferred_bpp = 0;
 
-	if (!HAS_DISPLAY(i915))
+	if (!HAS_DISPLAY(display))
 		return;
 
-	ifbdev = drmm_kzalloc(dev, sizeof(*ifbdev), GFP_KERNEL);
+	ifbdev = drmm_kzalloc(display->drm, sizeof(*ifbdev), GFP_KERNEL);
 	if (!ifbdev)
 		return;
 
-	i915->display.fbdev.fbdev = ifbdev;
-	if (intel_fbdev_init_bios(dev, ifbdev))
+	display->fbdev.fbdev = ifbdev;
+	if (intel_fbdev_init_bios(display, ifbdev))
 		preferred_bpp = intel_fbdev_color_mode(ifbdev->fb->base.format);
 	if (!preferred_bpp)
 		preferred_bpp = 32;
 
-	drm_client_setup_with_color_mode(dev, preferred_bpp);
+	drm_client_setup_with_color_mode(display->drm, preferred_bpp);
 }
 
 struct intel_framebuffer *intel_fbdev_framebuffer(struct intel_fbdev *fbdev)
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.h b/drivers/gpu/drm/i915/display/intel_fbdev.h
index 89bad3a2b01a..a15e3e222a0c 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.h
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.h
@@ -10,7 +10,7 @@
 
 struct drm_fb_helper;
 struct drm_fb_helper_surface_size;
-struct drm_i915_private;
+struct intel_display;
 struct intel_fbdev;
 struct intel_framebuffer;
 
@@ -19,14 +19,14 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper,
 				   struct drm_fb_helper_surface_size *sizes);
 #define INTEL_FBDEV_DRIVER_OPS \
 	.fbdev_probe = intel_fbdev_driver_fbdev_probe
-void intel_fbdev_setup(struct drm_i915_private *dev_priv);
+void intel_fbdev_setup(struct intel_display *display);
 struct intel_framebuffer *intel_fbdev_framebuffer(struct intel_fbdev *fbdev);
 struct i915_vma *intel_fbdev_vma_pointer(struct intel_fbdev *fbdev);
 
 #else
 #define INTEL_FBDEV_DRIVER_OPS \
 	.fbdev_probe = NULL
-static inline void intel_fbdev_setup(struct drm_i915_private *dev_priv)
+static inline void intel_fbdev_setup(struct intel_display *display)
 {
 }
 static inline struct intel_framebuffer *intel_fbdev_framebuffer(struct intel_fbdev *fbdev)
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c
index 4991c35a2632..5f4cb3328265 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c
@@ -15,9 +15,9 @@
 struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
 					       struct drm_fb_helper_surface_size *sizes)
 {
+	struct intel_display *display = to_intel_display(helper->dev);
+	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct drm_framebuffer *fb;
-	struct drm_device *dev = helper->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_mode_fb_cmd2 mode_cmd = {};
 	struct drm_i915_gem_object *obj;
 	int size;
@@ -50,14 +50,14 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
 		 *
 		 * Also skip stolen on MTL as Wa_22018444074 mitigation.
 		 */
-		if (!(IS_METEORLAKE(dev_priv)) && size * 2 < dev_priv->dsm.usable_size)
+		if (!display->platform.meteorlake && size * 2 < dev_priv->dsm.usable_size)
 			obj = i915_gem_object_create_stolen(dev_priv, size);
 		if (IS_ERR(obj))
 			obj = i915_gem_object_create_shmem(dev_priv, size);
 	}
 
 	if (IS_ERR(obj)) {
-		drm_err(&dev_priv->drm, "failed to allocate framebuffer (%pe)\n", obj);
+		drm_err(display->drm, "failed to allocate framebuffer (%pe)\n", obj);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -67,9 +67,10 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
 	return to_intel_framebuffer(fb);
 }
 
-int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
+int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info,
 			     struct drm_gem_object *_obj, struct i915_vma *vma)
 {
+	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct drm_i915_gem_object *obj = to_intel_bo(_obj);
 	struct i915_gem_ww_ctx ww;
 	void __iomem *vaddr;
@@ -101,7 +102,7 @@ int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info
 
 		vaddr = i915_vma_pin_iomap(vma);
 		if (IS_ERR(vaddr)) {
-			drm_err(&i915->drm,
+			drm_err(display->drm,
 				"Failed to remap framebuffer into virtual memory (%pe)\n", vaddr);
 			ret = PTR_ERR(vaddr);
 			continue;
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h
index e502ae375fc0..cb7957272715 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.h
+++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.h
@@ -9,13 +9,13 @@
 struct drm_fb_helper;
 struct drm_fb_helper_surface_size;
 struct drm_gem_object;
-struct drm_i915_private;
 struct fb_info;
 struct i915_vma;
+struct intel_display;
 
 struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
 					       struct drm_fb_helper_surface_size *sizes);
-int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
+int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info,
 			     struct drm_gem_object *obj, struct i915_vma *vma);
 
 #endif
diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c
index 40deee0769ae..169bbe154b5c 100644
--- a/drivers/gpu/drm/i915/display/intel_fdi.c
+++ b/drivers/gpu/drm/i915/display/intel_fdi.c
@@ -6,15 +6,16 @@
 #include <linux/string_helpers.h>
 
 #include <drm/drm_fixed.h>
+#include <drm/drm_print.h>
 
-#include "i915_drv.h"
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "intel_atomic.h"
 #include "intel_crtc.h"
 #include "intel_ddi.h"
 #include "intel_de.h"
-#include "intel_dp.h"
 #include "intel_display_types.h"
+#include "intel_dp.h"
 #include "intel_fdi.h"
 #include "intel_fdi_regs.h"
 #include "intel_link_bw.h"
@@ -464,7 +465,6 @@ static void ivb_update_fdi_bc_bifurcation(const struct intel_crtc_state *crtc_st
 void intel_fdi_normal_train(struct intel_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(crtc);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	enum pipe pipe = crtc->pipe;
 	i915_reg_t reg;
 	u32 temp;
@@ -483,7 +483,7 @@ void intel_fdi_normal_train(struct intel_crtc *crtc)
 
 	reg = FDI_RX_CTL(pipe);
 	temp = intel_de_read(display, reg);
-	if (HAS_PCH_CPT(dev_priv)) {
+	if (HAS_PCH_CPT(display)) {
 		temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
 		temp |= FDI_LINK_TRAIN_NORMAL_CPT;
 	} else {
@@ -607,7 +607,6 @@ static void gen6_fdi_link_train(struct intel_crtc *crtc,
 				const struct intel_crtc_state *crtc_state)
 {
 	struct intel_display *display = to_intel_display(crtc);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	enum pipe pipe = crtc->pipe;
 	i915_reg_t reg;
 	u32 temp, i, retry;
@@ -647,7 +646,7 @@ static void gen6_fdi_link_train(struct intel_crtc *crtc,
 
 	reg = FDI_RX_CTL(pipe);
 	temp = intel_de_read(display, reg);
-	if (HAS_PCH_CPT(dev_priv)) {
+	if (HAS_PCH_CPT(display)) {
 		temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
 		temp |= FDI_LINK_TRAIN_PATTERN_1_CPT;
 	} else {
@@ -698,7 +697,7 @@ static void gen6_fdi_link_train(struct intel_crtc *crtc,
 
 	reg = FDI_RX_CTL(pipe);
 	temp = intel_de_read(display, reg);
-	if (HAS_PCH_CPT(dev_priv)) {
+	if (HAS_PCH_CPT(display)) {
 		temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
 		temp |= FDI_LINK_TRAIN_PATTERN_2_CPT;
 	} else {
@@ -1077,7 +1076,6 @@ void ilk_fdi_pll_disable(struct intel_crtc *crtc)
 void ilk_fdi_disable(struct intel_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	i915_reg_t reg;
 	u32 temp;
@@ -1096,7 +1094,7 @@ void ilk_fdi_disable(struct intel_crtc *crtc)
 	udelay(100);
 
 	/* Ironlake workaround, disable clock pointer after downing FDI */
-	if (HAS_PCH_IBX(dev_priv))
+	if (HAS_PCH_IBX(display))
 		intel_de_write(display, FDI_RX_CHICKEN(pipe),
 			       FDI_RX_PHASE_SYNC_POINTER_OVR);
 
@@ -1106,7 +1104,7 @@ void ilk_fdi_disable(struct intel_crtc *crtc)
 
 	reg = FDI_RX_CTL(pipe);
 	temp = intel_de_read(display, reg);
-	if (HAS_PCH_CPT(dev_priv)) {
+	if (HAS_PCH_CPT(display)) {
 		temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
 		temp |= FDI_LINK_TRAIN_PATTERN_1_CPT;
 	} else {
diff --git a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c
index 451cd26024f7..2a787897b2d3 100644
--- a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c
+++ b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c
@@ -25,7 +25,8 @@
  *
  */
 
-#include "i915_drv.h"
+#include <drm/drm_print.h>
+
 #include "i915_reg.h"
 #include "intel_de.h"
 #include "intel_display_irq.h"
@@ -57,11 +58,10 @@
 
 static bool ivb_can_enable_err_int(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_crtc *crtc;
 	enum pipe pipe;
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	for_each_pipe(display, pipe) {
 		crtc = intel_crtc_for_pipe(display, pipe);
@@ -75,11 +75,10 @@ static bool ivb_can_enable_err_int(struct intel_display *display)
 
 static bool cpt_can_enable_serr_int(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	enum pipe pipe;
 	struct intel_crtc *crtc;
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	for_each_pipe(display, pipe) {
 		crtc = intel_crtc_for_pipe(display, pipe);
@@ -94,11 +93,10 @@ static bool cpt_can_enable_serr_int(struct intel_display *display)
 static void i9xx_check_fifo_underruns(struct intel_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	i915_reg_t reg = PIPESTAT(display, crtc->pipe);
 	u32 enable_mask;
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	if ((intel_de_read(display, reg) & PIPE_FIFO_UNDERRUN_STATUS) == 0)
 		return;
@@ -115,10 +113,9 @@ static void i9xx_set_fifo_underrun_reporting(struct intel_display *display,
 					     enum pipe pipe,
 					     bool enable, bool old)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	i915_reg_t reg = PIPESTAT(display, pipe);
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	if (enable) {
 		u32 enable_mask = i915_pipestat_enable_mask(display, pipe);
@@ -148,11 +145,10 @@ static void ilk_set_fifo_underrun_reporting(struct intel_display *display,
 static void ivb_check_fifo_underruns(struct intel_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	u32 err_int = intel_de_read(display, GEN7_ERR_INT);
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	if ((err_int & ERR_INT_FIFO_UNDERRUN(pipe)) == 0)
 		return;
@@ -213,11 +209,10 @@ static void ibx_set_fifo_underrun_reporting(struct intel_display *display,
 static void cpt_check_pch_fifo_underruns(struct intel_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pch_transcoder = crtc->pipe;
 	u32 serr_int = intel_de_read(display, SERR_INT);
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	if ((serr_int & SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder)) == 0)
 		return;
@@ -258,11 +253,10 @@ static void cpt_set_fifo_underrun_reporting(struct intel_display *display,
 static bool __intel_set_cpu_fifo_underrun_reporting(struct intel_display *display,
 						    enum pipe pipe, bool enable)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_crtc *crtc = intel_crtc_for_pipe(display, pipe);
 	bool old;
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	old = !crtc->cpu_fifo_underrun_disabled;
 	crtc->cpu_fifo_underrun_disabled = !enable;
@@ -298,13 +292,12 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct intel_display *displa
 bool intel_set_cpu_fifo_underrun_reporting(struct intel_display *display,
 					   enum pipe pipe, bool enable)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	unsigned long flags;
 	bool ret;
 
-	spin_lock_irqsave(&dev_priv->irq_lock, flags);
+	spin_lock_irqsave(&display->irq.lock, flags);
 	ret = __intel_set_cpu_fifo_underrun_reporting(display, pipe, enable);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+	spin_unlock_irqrestore(&display->irq.lock, flags);
 
 	return ret;
 }
@@ -327,7 +320,6 @@ bool intel_set_pch_fifo_underrun_reporting(struct intel_display *display,
 					   enum pipe pch_transcoder,
 					   bool enable)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_crtc *crtc = intel_crtc_for_pipe(display, pch_transcoder);
 	unsigned long flags;
 	bool old;
@@ -341,12 +333,12 @@ bool intel_set_pch_fifo_underrun_reporting(struct intel_display *display,
 	 * crtc on LPT won't cause issues.
 	 */
 
-	spin_lock_irqsave(&dev_priv->irq_lock, flags);
+	spin_lock_irqsave(&display->irq.lock, flags);
 
 	old = !crtc->pch_fifo_underrun_disabled;
 	crtc->pch_fifo_underrun_disabled = !enable;
 
-	if (HAS_PCH_IBX(dev_priv))
+	if (HAS_PCH_IBX(display))
 		ibx_set_fifo_underrun_reporting(display,
 						pch_transcoder,
 						enable);
@@ -355,7 +347,7 @@ bool intel_set_pch_fifo_underrun_reporting(struct intel_display *display,
 						pch_transcoder,
 						enable, old);
 
-	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+	spin_unlock_irqrestore(&display->irq.lock, flags);
 	return old;
 }
 
@@ -422,10 +414,9 @@ void intel_pch_fifo_underrun_irq_handler(struct intel_display *display,
  */
 void intel_check_cpu_fifo_underruns(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_crtc *crtc;
 
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	for_each_intel_crtc(display->drm, crtc) {
 		if (crtc->cpu_fifo_underrun_disabled)
@@ -437,7 +428,7 @@ void intel_check_cpu_fifo_underruns(struct intel_display *display)
 			ivb_check_fifo_underruns(crtc);
 	}
 
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 /**
@@ -450,20 +441,19 @@ void intel_check_cpu_fifo_underruns(struct intel_display *display)
  */
 void intel_check_pch_fifo_underruns(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_crtc *crtc;
 
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	for_each_intel_crtc(display->drm, crtc) {
 		if (crtc->pch_fifo_underrun_disabled)
 			continue;
 
-		if (HAS_PCH_CPT(dev_priv))
+		if (HAS_PCH_CPT(display))
 			cpt_check_pch_fifo_underruns(crtc);
 	}
 
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 void intel_init_fifo_underrun_reporting(struct intel_display *display,
diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index ba2f88ca6117..43be5377ddc1 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -58,7 +58,6 @@
 #include <drm/drm_gem.h>
 
 #include "i915_active.h"
-#include "i915_drv.h"
 #include "i915_vma.h"
 #include "intel_bo.h"
 #include "intel_display_trace.h"
@@ -72,7 +71,7 @@
 
 /**
  * frontbuffer_flush - flush frontbuffer
- * @i915: i915 device
+ * @display: display device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  * @origin: which operation caused the flush
  *
@@ -82,16 +81,14 @@
  *
  * Can be called without any locks held.
  */
-static void frontbuffer_flush(struct drm_i915_private *i915,
+static void frontbuffer_flush(struct intel_display *display,
 			      unsigned int frontbuffer_bits,
 			      enum fb_op_origin origin)
 {
-	struct intel_display *display = &i915->display;
-
 	/* Delay flushing when rings are still busy.*/
-	spin_lock(&i915->display.fb_tracking.lock);
-	frontbuffer_bits &= ~i915->display.fb_tracking.busy_bits;
-	spin_unlock(&i915->display.fb_tracking.lock);
+	spin_lock(&display->fb_tracking.lock);
+	frontbuffer_bits &= ~display->fb_tracking.busy_bits;
+	spin_unlock(&display->fb_tracking.lock);
 
 	if (!frontbuffer_bits)
 		return;
@@ -107,7 +104,7 @@ static void frontbuffer_flush(struct drm_i915_private *i915,
 
 /**
  * intel_frontbuffer_flip_prepare - prepare asynchronous frontbuffer flip
- * @i915: i915 device
+ * @display: display device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  *
  * This function gets called after scheduling a flip on @obj. The actual
@@ -117,19 +114,19 @@ static void frontbuffer_flush(struct drm_i915_private *i915,
  *
  * Can be called without any locks held.
  */
-void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915,
+void intel_frontbuffer_flip_prepare(struct intel_display *display,
 				    unsigned frontbuffer_bits)
 {
-	spin_lock(&i915->display.fb_tracking.lock);
-	i915->display.fb_tracking.flip_bits |= frontbuffer_bits;
+	spin_lock(&display->fb_tracking.lock);
+	display->fb_tracking.flip_bits |= frontbuffer_bits;
 	/* Remove stale busy bits due to the old buffer. */
-	i915->display.fb_tracking.busy_bits &= ~frontbuffer_bits;
-	spin_unlock(&i915->display.fb_tracking.lock);
+	display->fb_tracking.busy_bits &= ~frontbuffer_bits;
+	spin_unlock(&display->fb_tracking.lock);
 }
 
 /**
  * intel_frontbuffer_flip_complete - complete asynchronous frontbuffer flip
- * @i915: i915 device
+ * @display: display device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  *
  * This function gets called after the flip has been latched and will complete
@@ -137,22 +134,22 @@ void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915,
  *
  * Can be called without any locks held.
  */
-void intel_frontbuffer_flip_complete(struct drm_i915_private *i915,
+void intel_frontbuffer_flip_complete(struct intel_display *display,
 				     unsigned frontbuffer_bits)
 {
-	spin_lock(&i915->display.fb_tracking.lock);
+	spin_lock(&display->fb_tracking.lock);
 	/* Mask any cancelled flips. */
-	frontbuffer_bits &= i915->display.fb_tracking.flip_bits;
-	i915->display.fb_tracking.flip_bits &= ~frontbuffer_bits;
-	spin_unlock(&i915->display.fb_tracking.lock);
+	frontbuffer_bits &= display->fb_tracking.flip_bits;
+	display->fb_tracking.flip_bits &= ~frontbuffer_bits;
+	spin_unlock(&display->fb_tracking.lock);
 
 	if (frontbuffer_bits)
-		frontbuffer_flush(i915, frontbuffer_bits, ORIGIN_FLIP);
+		frontbuffer_flush(display, frontbuffer_bits, ORIGIN_FLIP);
 }
 
 /**
  * intel_frontbuffer_flip - synchronous frontbuffer flip
- * @i915: i915 device
+ * @display: display device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  *
  * This function gets called after scheduling a flip on @obj. This is for
@@ -161,15 +158,15 @@ void intel_frontbuffer_flip_complete(struct drm_i915_private *i915,
  *
  * Can be called without any locks held.
  */
-void intel_frontbuffer_flip(struct drm_i915_private *i915,
+void intel_frontbuffer_flip(struct intel_display *display,
 			    unsigned frontbuffer_bits)
 {
-	spin_lock(&i915->display.fb_tracking.lock);
+	spin_lock(&display->fb_tracking.lock);
 	/* Remove stale busy bits due to the old buffer. */
-	i915->display.fb_tracking.busy_bits &= ~frontbuffer_bits;
-	spin_unlock(&i915->display.fb_tracking.lock);
+	display->fb_tracking.busy_bits &= ~frontbuffer_bits;
+	spin_unlock(&display->fb_tracking.lock);
 
-	frontbuffer_flush(i915, frontbuffer_bits, ORIGIN_FLIP);
+	frontbuffer_flush(display, frontbuffer_bits, ORIGIN_FLIP);
 }
 
 void __intel_fb_invalidate(struct intel_frontbuffer *front,
@@ -198,7 +195,6 @@ void __intel_fb_flush(struct intel_frontbuffer *front,
 		      unsigned int frontbuffer_bits)
 {
 	struct intel_display *display = to_intel_display(front->obj->dev);
-	struct drm_i915_private *i915 = to_i915(display->drm);
 
 	if (origin == ORIGIN_CS) {
 		spin_lock(&display->fb_tracking.lock);
@@ -209,7 +205,7 @@ void __intel_fb_flush(struct intel_frontbuffer *front,
 	}
 
 	if (frontbuffer_bits)
-		frontbuffer_flush(i915, frontbuffer_bits, origin);
+		frontbuffer_flush(display, frontbuffer_bits, origin);
 }
 
 static void intel_frontbuffer_flush_work(struct work_struct *work)
@@ -280,7 +276,7 @@ static void frontbuffer_release(struct kref *ref)
 struct intel_frontbuffer *
 intel_frontbuffer_get(struct drm_gem_object *obj)
 {
-	struct drm_i915_private *i915 = to_i915(obj->dev);
+	struct intel_display *display = to_intel_display(obj->dev);
 	struct intel_frontbuffer *front, *cur;
 
 	front = intel_bo_get_frontbuffer(obj);
@@ -300,9 +296,9 @@ intel_frontbuffer_get(struct drm_gem_object *obj)
 			 I915_ACTIVE_RETIRE_SLEEPS);
 	INIT_WORK(&front->flush_work, intel_frontbuffer_flush_work);
 
-	spin_lock(&i915->display.fb_tracking.lock);
+	spin_lock(&display->fb_tracking.lock);
 	cur = intel_bo_set_frontbuffer(obj, front);
-	spin_unlock(&i915->display.fb_tracking.lock);
+	spin_unlock(&display->fb_tracking.lock);
 	if (cur != front)
 		kfree(front);
 	return cur;
diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.h b/drivers/gpu/drm/i915/display/intel_frontbuffer.h
index 6237780a9f68..2fee12eaf9b6 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.h
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.h
@@ -31,7 +31,7 @@
 #include "i915_active_types.h"
 
 struct drm_gem_object;
-struct drm_i915_private;
+struct intel_display;
 
 enum fb_op_origin {
 	ORIGIN_CPU = 0,
@@ -68,11 +68,11 @@ struct intel_frontbuffer {
 	GENMASK(INTEL_FRONTBUFFER_BITS_PER_PIPE * ((pipe) + 1) - 1,	\
 		INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))
 
-void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915,
+void intel_frontbuffer_flip_prepare(struct intel_display *display,
 				    unsigned frontbuffer_bits);
-void intel_frontbuffer_flip_complete(struct drm_i915_private *i915,
+void intel_frontbuffer_flip_complete(struct intel_display *display,
 				     unsigned frontbuffer_bits);
-void intel_frontbuffer_flip(struct drm_i915_private *i915,
+void intel_frontbuffer_flip(struct intel_display *display,
 			    unsigned frontbuffer_bits);
 
 void intel_frontbuffer_put(struct intel_frontbuffer *front);
diff --git a/drivers/gpu/drm/i915/display/intel_global_state.c b/drivers/gpu/drm/i915/display/intel_global_state.c
index 8a49e2bb37fa..000a898c9480 100644
--- a/drivers/gpu/drm/i915/display/intel_global_state.c
+++ b/drivers/gpu/drm/i915/display/intel_global_state.c
@@ -3,10 +3,13 @@
  * Copyright © 2020 Intel Corporation
  */
 
+#include <linux/pci.h>
 #include <linux/string.h>
 
-#include "i915_drv.h"
+#include <drm/drm_print.h>
+
 #include "intel_atomic.h"
+#include "intel_display_core.h"
 #include "intel_display_types.h"
 #include "intel_global_state.h"
 
diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c
index abf457e68ee9..d55cc77650b7 100644
--- a/drivers/gpu/drm/i915/display/intel_gmbus.c
+++ b/drivers/gpu/drm/i915/display/intel_gmbus.c
@@ -152,32 +152,31 @@ static const struct gmbus_pin gmbus_pins_mtp[] = {
 static const struct gmbus_pin *get_gmbus_pin(struct intel_display *display,
 					     unsigned int pin)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	const struct gmbus_pin *pins;
 	size_t size;
 
-	if (INTEL_PCH_TYPE(i915) >= PCH_MTL) {
+	if (INTEL_PCH_TYPE(display) >= PCH_MTL) {
 		pins = gmbus_pins_mtp;
 		size = ARRAY_SIZE(gmbus_pins_mtp);
-	} else if (INTEL_PCH_TYPE(i915) >= PCH_DG2) {
+	} else if (INTEL_PCH_TYPE(display) >= PCH_DG2) {
 		pins = gmbus_pins_dg2;
 		size = ARRAY_SIZE(gmbus_pins_dg2);
-	} else if (INTEL_PCH_TYPE(i915) >= PCH_DG1) {
+	} else if (INTEL_PCH_TYPE(display) >= PCH_DG1) {
 		pins = gmbus_pins_dg1;
 		size = ARRAY_SIZE(gmbus_pins_dg1);
-	} else if (INTEL_PCH_TYPE(i915) >= PCH_ICP) {
+	} else if (INTEL_PCH_TYPE(display) >= PCH_ICP) {
 		pins = gmbus_pins_icp;
 		size = ARRAY_SIZE(gmbus_pins_icp);
-	} else if (HAS_PCH_CNP(i915)) {
+	} else if (HAS_PCH_CNP(display)) {
 		pins = gmbus_pins_cnp;
 		size = ARRAY_SIZE(gmbus_pins_cnp);
-	} else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) {
+	} else if (display->platform.geminilake || display->platform.broxton) {
 		pins = gmbus_pins_bxt;
 		size = ARRAY_SIZE(gmbus_pins_bxt);
 	} else if (DISPLAY_VER(display) == 9) {
 		pins = gmbus_pins_skl;
 		size = ARRAY_SIZE(gmbus_pins_skl);
-	} else if (IS_BROADWELL(i915)) {
+	} else if (display->platform.broadwell) {
 		pins = gmbus_pins_bdw;
 		size = ARRAY_SIZE(gmbus_pins_bdw);
 	} else {
@@ -240,11 +239,10 @@ static void bxt_gmbus_clock_gating(struct intel_display *display,
 static u32 get_reserved(struct intel_gmbus *bus)
 {
 	struct intel_display *display = bus->display;
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	u32 reserved = 0;
 
 	/* On most chips, these bits must be preserved in software. */
-	if (!IS_I830(i915) && !IS_I845G(i915))
+	if (!display->platform.i830 && !display->platform.i845g)
 		reserved = intel_de_read_notrace(display, bus->gpio_reg) &
 			(GPIO_DATA_PULLUP_DISABLE | GPIO_CLOCK_PULLUP_DISABLE);
 
@@ -314,11 +312,10 @@ intel_gpio_pre_xfer(struct i2c_adapter *adapter)
 {
 	struct intel_gmbus *bus = to_intel_gmbus(adapter);
 	struct intel_display *display = bus->display;
-	struct drm_i915_private *i915 = to_i915(display->drm);
 
 	intel_gmbus_reset(display);
 
-	if (IS_PINEVIEW(i915))
+	if (display->platform.pineview)
 		pnv_gmbus_clock_gating(display, false);
 
 	set_data(bus, 1);
@@ -332,12 +329,11 @@ intel_gpio_post_xfer(struct i2c_adapter *adapter)
 {
 	struct intel_gmbus *bus = to_intel_gmbus(adapter);
 	struct intel_display *display = bus->display;
-	struct drm_i915_private *i915 = to_i915(display->drm);
 
 	set_data(bus, 1);
 	set_clock(bus, 1);
 
-	if (IS_PINEVIEW(i915))
+	if (display->platform.pineview)
 		pnv_gmbus_clock_gating(display, true);
 }
 
@@ -630,14 +626,13 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num,
 {
 	struct intel_gmbus *bus = to_intel_gmbus(adapter);
 	struct intel_display *display = bus->display;
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	int i = 0, inc, try = 0;
 	int ret = 0;
 
 	/* Display WA #0868: skl,bxt,kbl,cfl,glk */
-	if (IS_GEMINILAKE(i915) || IS_BROXTON(i915))
+	if (display->platform.geminilake || display->platform.broxton)
 		bxt_gmbus_clock_gating(display, false);
-	else if (HAS_PCH_SPT(i915) || HAS_PCH_CNP(i915))
+	else if (HAS_PCH_SPT(display) || HAS_PCH_CNP(display))
 		pch_gmbus_clock_gating(display, false);
 
 retry:
@@ -748,9 +743,9 @@ timeout:
 
 out:
 	/* Display WA #0868: skl,bxt,kbl,cfl,glk */
-	if (IS_GEMINILAKE(i915) || IS_BROXTON(i915))
+	if (display->platform.geminilake || display->platform.broxton)
 		bxt_gmbus_clock_gating(display, true);
-	else if (HAS_PCH_SPT(i915) || HAS_PCH_CNP(i915))
+	else if (HAS_PCH_SPT(display) || HAS_PCH_CNP(display))
 		pch_gmbus_clock_gating(display, true);
 
 	return ret;
@@ -873,12 +868,11 @@ static const struct i2c_lock_operations gmbus_lock_ops = {
  */
 int intel_gmbus_setup(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct pci_dev *pdev = to_pci_dev(display->drm->dev);
 	unsigned int pin;
 	int ret;
 
-	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+	if (display->platform.valleyview || display->platform.cherryview)
 		display->gmbus.mmio_base = VLV_DISPLAY_BASE;
 	else if (!HAS_GMCH(display))
 		/*
@@ -925,7 +919,7 @@ int intel_gmbus_setup(struct intel_display *display)
 		bus->reg0 = pin | GMBUS_RATE_100KHZ;
 
 		/* gmbus seems to be broken on i830 */
-		if (IS_I830(i915))
+		if (display->platform.i830)
 			bus->force_bit = 1;
 
 		intel_gpio_setup(bus, GPIO(display, gmbus_pin->gpio));
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c
index 411f17655f89..3e3038f4ee1f 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -27,10 +27,13 @@
 #include "intel_dp_mst.h"
 #include "intel_hdcp.h"
 #include "intel_hdcp_gsc.h"
+#include "intel_hdcp_gsc_message.h"
 #include "intel_hdcp_regs.h"
 #include "intel_hdcp_shim.h"
 #include "intel_pcode.h"
 
+#define USE_HDCP_GSC(__display)		(DISPLAY_VER(__display) >= 14)
+
 #define KEY_LOAD_TRIES	5
 #define HDCP2_LC_RETRY_CNT			3
 
@@ -250,8 +253,8 @@ static bool intel_hdcp2_prerequisite(struct intel_connector *connector)
 		return false;
 
 	/* If MTL+ make sure gsc is loaded and proxy is setup */
-	if (intel_hdcp_gsc_cs_required(display)) {
-		if (!intel_hdcp_gsc_check_status(display))
+	if (USE_HDCP_GSC(display)) {
+		if (!intel_hdcp_gsc_check_status(display->drm))
 			return false;
 	}
 
@@ -2339,7 +2342,7 @@ static int initialize_hdcp_port_data(struct intel_connector *connector,
 
 static bool is_hdcp2_supported(struct intel_display *display)
 {
-	if (intel_hdcp_gsc_cs_required(display))
+	if (USE_HDCP_GSC(display))
 		return true;
 
 	if (!IS_ENABLED(CONFIG_INTEL_MEI_HDCP))
@@ -2363,7 +2366,7 @@ void intel_hdcp_component_init(struct intel_display *display)
 
 	display->hdcp.comp_added = true;
 	mutex_unlock(&display->hdcp.hdcp_mutex);
-	if (intel_hdcp_gsc_cs_required(display))
+	if (USE_HDCP_GSC(display))
 		ret = intel_hdcp_gsc_init(display);
 	else
 		ret = component_add_typed(display->drm->dev, &i915_hdcp_ops,
@@ -2638,7 +2641,7 @@ void intel_hdcp_component_fini(struct intel_display *display)
 	display->hdcp.comp_added = false;
 	mutex_unlock(&display->hdcp.hdcp_mutex);
 
-	if (intel_hdcp_gsc_cs_required(display))
+	if (USE_HDCP_GSC(display))
 		intel_hdcp_gsc_fini(display);
 	else
 		component_del(display->drm->dev, &i915_hdcp_ops);
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_gsc.c b/drivers/gpu/drm/i915/display/intel_hdcp_gsc.c
index 55965844d829..6a22862d6be1 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp_gsc.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp_gsc.c
@@ -11,27 +11,22 @@
 #include "i915_drv.h"
 #include "i915_utils.h"
 #include "intel_hdcp_gsc.h"
-#include "intel_hdcp_gsc_message.h"
 
-struct intel_hdcp_gsc_message {
+struct intel_hdcp_gsc_context {
+	struct drm_i915_private *i915;
 	struct i915_vma *vma;
 	void *hdcp_cmd_in;
 	void *hdcp_cmd_out;
 };
 
-bool intel_hdcp_gsc_cs_required(struct intel_display *display)
+bool intel_hdcp_gsc_check_status(struct drm_device *drm)
 {
-	return DISPLAY_VER(display) >= 14;
-}
-
-bool intel_hdcp_gsc_check_status(struct intel_display *display)
-{
-	struct drm_i915_private *i915 = to_i915(display->drm);
+	struct drm_i915_private *i915 = to_i915(drm);
 	struct intel_gt *gt = i915->media_gt;
 	struct intel_gsc_uc *gsc = gt ? &gt->uc.gsc : NULL;
 
 	if (!gsc || !intel_uc_fw_is_running(&gsc->fw)) {
-		drm_dbg_kms(display->drm,
+		drm_dbg_kms(&i915->drm,
 			    "GSC components required for HDCP2.2 are not ready\n");
 		return false;
 	}
@@ -41,7 +36,7 @@ bool intel_hdcp_gsc_check_status(struct intel_display *display)
 
 /*This function helps allocate memory for the command that we will send to gsc cs */
 static int intel_hdcp_gsc_initialize_message(struct drm_i915_private *i915,
-					     struct intel_hdcp_gsc_message *hdcp_message)
+					     struct intel_hdcp_gsc_context *gsc_context)
 {
 	struct intel_gt *gt = i915->media_gt;
 	struct drm_i915_gem_object *obj = NULL;
@@ -78,9 +73,10 @@ static int intel_hdcp_gsc_initialize_message(struct drm_i915_private *i915,
 
 	memset(cmd_in, 0, obj->base.size);
 
-	hdcp_message->hdcp_cmd_in = cmd_in;
-	hdcp_message->hdcp_cmd_out = cmd_out;
-	hdcp_message->vma = vma;
+	gsc_context->hdcp_cmd_in = cmd_in;
+	gsc_context->hdcp_cmd_out = cmd_out;
+	gsc_context->vma = vma;
+	gsc_context->i915 = i915;
 
 	return 0;
 
@@ -91,80 +87,37 @@ out_unpin:
 	return err;
 }
 
-static const struct i915_hdcp_ops gsc_hdcp_ops = {
-	.initiate_hdcp2_session = intel_hdcp_gsc_initiate_session,
-	.verify_receiver_cert_prepare_km =
-				intel_hdcp_gsc_verify_receiver_cert_prepare_km,
-	.verify_hprime = intel_hdcp_gsc_verify_hprime,
-	.store_pairing_info = intel_hdcp_gsc_store_pairing_info,
-	.initiate_locality_check = intel_hdcp_gsc_initiate_locality_check,
-	.verify_lprime = intel_hdcp_gsc_verify_lprime,
-	.get_session_key = intel_hdcp_gsc_get_session_key,
-	.repeater_check_flow_prepare_ack =
-				intel_hdcp_gsc_repeater_check_flow_prepare_ack,
-	.verify_mprime = intel_hdcp_gsc_verify_mprime,
-	.enable_hdcp_authentication = intel_hdcp_gsc_enable_authentication,
-	.close_hdcp_session = intel_hdcp_gsc_close_session,
-};
-
-static int intel_hdcp_gsc_hdcp2_init(struct intel_display *display)
+struct intel_hdcp_gsc_context *intel_hdcp_gsc_context_alloc(struct drm_device *drm)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-	struct intel_hdcp_gsc_message *hdcp_message;
+	struct drm_i915_private *i915 = to_i915(drm);
+	struct intel_hdcp_gsc_context *gsc_context;
 	int ret;
 
-	hdcp_message = kzalloc(sizeof(*hdcp_message), GFP_KERNEL);
-
-	if (!hdcp_message)
-		return -ENOMEM;
+	gsc_context = kzalloc(sizeof(*gsc_context), GFP_KERNEL);
+	if (!gsc_context)
+		return ERR_PTR(-ENOMEM);
 
 	/*
 	 * NOTE: No need to lock the comp mutex here as it is already
 	 * going to be taken before this function called
 	 */
-	display->hdcp.hdcp_message = hdcp_message;
-	ret = intel_hdcp_gsc_initialize_message(i915, hdcp_message);
-
-	if (ret)
-		drm_err(display->drm, "Could not initialize hdcp_message\n");
-
-	return ret;
-}
-
-static void intel_hdcp_gsc_free_message(struct intel_display *display)
-{
-	struct intel_hdcp_gsc_message *hdcp_message =
-					display->hdcp.hdcp_message;
+	ret = intel_hdcp_gsc_initialize_message(i915, gsc_context);
+	if (ret) {
+		drm_err(&i915->drm, "Could not initialize gsc_context\n");
+		kfree(gsc_context);
+		gsc_context = ERR_PTR(ret);
+	}
 
-	hdcp_message->hdcp_cmd_in = NULL;
-	hdcp_message->hdcp_cmd_out = NULL;
-	i915_vma_unpin_and_release(&hdcp_message->vma, I915_VMA_RELEASE_MAP);
-	kfree(hdcp_message);
+	return gsc_context;
 }
 
-int intel_hdcp_gsc_init(struct intel_display *display)
+void intel_hdcp_gsc_context_free(struct intel_hdcp_gsc_context *gsc_context)
 {
-	struct i915_hdcp_arbiter *data;
-	int ret;
-
-	data = kzalloc(sizeof(struct i915_hdcp_arbiter), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
+	if (!gsc_context)
+		return;
 
-	mutex_lock(&display->hdcp.hdcp_mutex);
-	display->hdcp.arbiter = data;
-	display->hdcp.arbiter->hdcp_dev = display->drm->dev;
-	display->hdcp.arbiter->ops = &gsc_hdcp_ops;
-	ret = intel_hdcp_gsc_hdcp2_init(display);
-	mutex_unlock(&display->hdcp.hdcp_mutex);
-
-	return ret;
-}
-
-void intel_hdcp_gsc_fini(struct intel_display *display)
-{
-	intel_hdcp_gsc_free_message(display);
-	kfree(display->hdcp.arbiter);
+	i915_vma_unpin_and_release(&gsc_context->vma, I915_VMA_RELEASE_MAP);
+	kfree(gsc_context);
 }
 
 static int intel_gsc_send_sync(struct drm_i915_private *i915,
@@ -211,18 +164,18 @@ static int intel_gsc_send_sync(struct drm_i915_private *i915,
 /*
  * This function can now be used for sending requests and will also handle
  * receipt of reply messages hence no different function of message retrieval
- * is required. We will initialize intel_hdcp_gsc_message structure then add
+ * is required. We will initialize intel_hdcp_gsc_context structure then add
  * gsc cs memory header as stated in specs after which the normal HDCP payload
  * will follow
  */
-ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in,
-				size_t msg_in_len, u8 *msg_out,
-				size_t msg_out_len)
+ssize_t intel_hdcp_gsc_msg_send(struct intel_hdcp_gsc_context *gsc_context,
+				void *msg_in, size_t msg_in_len,
+				void *msg_out, size_t msg_out_len)
 {
+	struct drm_i915_private *i915 = gsc_context->i915;
 	struct intel_gt *gt = i915->media_gt;
 	struct intel_gsc_mtl_header *header_in, *header_out;
 	const size_t max_msg_size = PAGE_SIZE - sizeof(*header_in);
-	struct intel_hdcp_gsc_message *hdcp_message;
 	u64 addr_in, addr_out, host_session_id;
 	u32 reply_size, msg_size_in, msg_size_out;
 	int ret, tries = 0;
@@ -235,10 +188,9 @@ ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in,
 
 	msg_size_in = msg_in_len + sizeof(*header_in);
 	msg_size_out = msg_out_len + sizeof(*header_out);
-	hdcp_message = i915->display.hdcp.hdcp_message;
-	header_in = hdcp_message->hdcp_cmd_in;
-	header_out = hdcp_message->hdcp_cmd_out;
-	addr_in = i915_ggtt_offset(hdcp_message->vma);
+	header_in = gsc_context->hdcp_cmd_in;
+	header_out = gsc_context->hdcp_cmd_out;
+	addr_in = i915_ggtt_offset(gsc_context->vma);
 	addr_out = addr_in + PAGE_SIZE;
 
 	memset(header_in, 0, msg_size_in);
@@ -246,7 +198,7 @@ ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in,
 	get_random_bytes(&host_session_id, sizeof(u64));
 	intel_gsc_uc_heci_cmd_emit_mtl_header(header_in, HECI_MEADDRESS_HDCP,
 					      msg_size_in, host_session_id);
-	memcpy(hdcp_message->hdcp_cmd_in + sizeof(*header_in), msg_in, msg_in_len);
+	memcpy(gsc_context->hdcp_cmd_in + sizeof(*header_in), msg_in, msg_in_len);
 
 	/*
 	 * Keep sending request in case the pending bit is set no need to add
@@ -280,7 +232,7 @@ ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in,
 			    reply_size, (u32)msg_out_len);
 	}
 
-	memcpy(msg_out, hdcp_message->hdcp_cmd_out + sizeof(*header_out), msg_out_len);
+	memcpy(msg_out, gsc_context->hdcp_cmd_out + sizeof(*header_out), msg_out_len);
 
 err:
 	return ret;
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_gsc.h b/drivers/gpu/drm/i915/display/intel_hdcp_gsc.h
index 5695a5e4f609..9305c14aaffe 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp_gsc.h
+++ b/drivers/gpu/drm/i915/display/intel_hdcp_gsc.h
@@ -6,19 +6,17 @@
 #ifndef __INTEL_HDCP_GSC_H__
 #define __INTEL_HDCP_GSC_H__
 
-#include <linux/err.h>
 #include <linux/types.h>
 
-struct drm_i915_private;
-struct intel_display;
-struct intel_hdcp_gsc_message;
+struct drm_device;
+struct intel_hdcp_gsc_context;
 
-bool intel_hdcp_gsc_cs_required(struct intel_display *display);
-ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in,
-				size_t msg_in_len, u8 *msg_out,
-				size_t msg_out_len);
-int intel_hdcp_gsc_init(struct intel_display *display);
-void intel_hdcp_gsc_fini(struct intel_display *display);
-bool intel_hdcp_gsc_check_status(struct intel_display *display);
+ssize_t intel_hdcp_gsc_msg_send(struct intel_hdcp_gsc_context *gsc_context,
+				void *msg_in, size_t msg_in_len,
+				void *msg_out, size_t msg_out_len);
+bool intel_hdcp_gsc_check_status(struct drm_device *drm);
+
+struct intel_hdcp_gsc_context *intel_hdcp_gsc_context_alloc(struct drm_device *drm);
+void intel_hdcp_gsc_context_free(struct intel_hdcp_gsc_context *gsc_context);
 
 #endif /* __INTEL_HDCP_GCS_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_gsc_message.c b/drivers/gpu/drm/i915/display/intel_hdcp_gsc_message.c
index 129104fa9b16..98967bb148e3 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp_gsc_message.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp_gsc_message.c
@@ -4,20 +4,23 @@
  */
 
 #include <linux/err.h>
+
+#include <drm/drm_print.h>
 #include <drm/intel/i915_hdcp_interface.h>
 
-#include "i915_drv.h"
+#include "intel_display_core.h"
 #include "intel_display_types.h"
+#include "intel_hdcp_gsc.h"
 #include "intel_hdcp_gsc_message.h"
 
-int
+static int
 intel_hdcp_gsc_initiate_session(struct device *dev, struct hdcp_port_data *data,
 				struct hdcp2_ake_init *ake_data)
 {
 	struct wired_cmd_initiate_hdcp2_session_in session_init_in = {};
 	struct wired_cmd_initiate_hdcp2_session_out session_init_out = {};
+	struct intel_hdcp_gsc_context *gsc_context;
 	struct intel_display *display;
-	struct drm_i915_private *i915;
 	ssize_t byte;
 
 	if (!dev || !data || !ake_data)
@@ -28,7 +31,7 @@ intel_hdcp_gsc_initiate_session(struct device *dev, struct hdcp_port_data *data,
 		dev_err(dev, "DRM not initialized, aborting HDCP.\n");
 		return -ENODEV;
 	}
-	i915 = to_i915(display->drm);
+	gsc_context = display->hdcp.gsc_context;
 
 	session_init_in.header.api_version = HDCP_API_VERSION;
 	session_init_in.header.command_id = WIRED_INITIATE_HDCP2_SESSION;
@@ -41,9 +44,9 @@ intel_hdcp_gsc_initiate_session(struct device *dev, struct hdcp_port_data *data,
 	session_init_in.port.attached_transcoder = (u8)data->hdcp_transcoder;
 	session_init_in.protocol = data->protocol;
 
-	byte = intel_hdcp_gsc_msg_send(i915, (u8 *)&session_init_in,
+	byte = intel_hdcp_gsc_msg_send(gsc_context, &session_init_in,
 				       sizeof(session_init_in),
-				       (u8 *)&session_init_out,
+				       &session_init_out,
 				       sizeof(session_init_out));
 	if (byte < 0) {
 		drm_dbg_kms(display->drm, "intel_hdcp_gsc_msg_send failed. %zd\n", byte);
@@ -64,7 +67,7 @@ intel_hdcp_gsc_initiate_session(struct device *dev, struct hdcp_port_data *data,
 	return 0;
 }
 
-int
+static int
 intel_hdcp_gsc_verify_receiver_cert_prepare_km(struct device *dev,
 					       struct hdcp_port_data *data,
 					       struct hdcp2_ake_send_cert *rx_cert,
@@ -75,8 +78,8 @@ intel_hdcp_gsc_verify_receiver_cert_prepare_km(struct device *dev,
 {
 	struct wired_cmd_verify_receiver_cert_in verify_rxcert_in = {};
 	struct wired_cmd_verify_receiver_cert_out verify_rxcert_out = {};
+	struct intel_hdcp_gsc_context *gsc_context;
 	struct intel_display *display;
-	struct drm_i915_private *i915;
 	ssize_t byte;
 
 	if (!dev || !data || !rx_cert || !km_stored || !ek_pub_km || !msg_sz)
@@ -87,7 +90,7 @@ intel_hdcp_gsc_verify_receiver_cert_prepare_km(struct device *dev,
 		dev_err(dev, "DRM not initialized, aborting HDCP.\n");
 		return -ENODEV;
 	}
-	i915 = to_i915(display->drm);
+	gsc_context = display->hdcp.gsc_context;
 
 	verify_rxcert_in.header.api_version = HDCP_API_VERSION;
 	verify_rxcert_in.header.command_id = WIRED_VERIFY_RECEIVER_CERT;
@@ -103,9 +106,9 @@ intel_hdcp_gsc_verify_receiver_cert_prepare_km(struct device *dev,
 	memcpy(verify_rxcert_in.r_rx, &rx_cert->r_rx, HDCP_2_2_RRX_LEN);
 	memcpy(verify_rxcert_in.rx_caps, rx_cert->rx_caps, HDCP_2_2_RXCAPS_LEN);
 
-	byte = intel_hdcp_gsc_msg_send(i915, (u8 *)&verify_rxcert_in,
+	byte = intel_hdcp_gsc_msg_send(gsc_context, &verify_rxcert_in,
 				       sizeof(verify_rxcert_in),
-				       (u8 *)&verify_rxcert_out,
+				       &verify_rxcert_out,
 				       sizeof(verify_rxcert_out));
 	if (byte < 0) {
 		drm_dbg_kms(display->drm, "intel_hdcp_gsc_msg_send failed: %zd\n", byte);
@@ -134,14 +137,14 @@ intel_hdcp_gsc_verify_receiver_cert_prepare_km(struct device *dev,
 	return 0;
 }
 
-int
+static int
 intel_hdcp_gsc_verify_hprime(struct device *dev, struct hdcp_port_data *data,
 			     struct hdcp2_ake_send_hprime *rx_hprime)
 {
 	struct wired_cmd_ake_send_hprime_in send_hprime_in = {};
 	struct wired_cmd_ake_send_hprime_out send_hprime_out = {};
+	struct intel_hdcp_gsc_context *gsc_context;
 	struct intel_display *display;
-	struct drm_i915_private *i915;
 	ssize_t byte;
 
 	if (!dev || !data || !rx_hprime)
@@ -152,7 +155,7 @@ intel_hdcp_gsc_verify_hprime(struct device *dev, struct hdcp_port_data *data,
 		dev_err(dev, "DRM not initialized, aborting HDCP.\n");
 		return -ENODEV;
 	}
-	i915 = to_i915(display->drm);
+	gsc_context = display->hdcp.gsc_context;
 
 	send_hprime_in.header.api_version = HDCP_API_VERSION;
 	send_hprime_in.header.command_id = WIRED_AKE_SEND_HPRIME;
@@ -166,9 +169,9 @@ intel_hdcp_gsc_verify_hprime(struct device *dev, struct hdcp_port_data *data,
 	memcpy(send_hprime_in.h_prime, rx_hprime->h_prime,
 	       HDCP_2_2_H_PRIME_LEN);
 
-	byte = intel_hdcp_gsc_msg_send(i915, (u8 *)&send_hprime_in,
+	byte = intel_hdcp_gsc_msg_send(gsc_context, &send_hprime_in,
 				       sizeof(send_hprime_in),
-				       (u8 *)&send_hprime_out,
+				       &send_hprime_out,
 				       sizeof(send_hprime_out));
 	if (byte < 0) {
 		drm_dbg_kms(display->drm, "intel_hdcp_gsc_msg_send failed. %zd\n", byte);
@@ -184,14 +187,14 @@ intel_hdcp_gsc_verify_hprime(struct device *dev, struct hdcp_port_data *data,
 	return 0;
 }
 
-int
+static int
 intel_hdcp_gsc_store_pairing_info(struct device *dev, struct hdcp_port_data *data,
 				  struct hdcp2_ake_send_pairing_info *pairing_info)
 {
 	struct wired_cmd_ake_send_pairing_info_in pairing_info_in = {};
 	struct wired_cmd_ake_send_pairing_info_out pairing_info_out = {};
+	struct intel_hdcp_gsc_context *gsc_context;
 	struct intel_display *display;
-	struct drm_i915_private *i915;
 	ssize_t byte;
 
 	if (!dev || !data || !pairing_info)
@@ -202,7 +205,7 @@ intel_hdcp_gsc_store_pairing_info(struct device *dev, struct hdcp_port_data *dat
 		dev_err(dev, "DRM not initialized, aborting HDCP.\n");
 		return -ENODEV;
 	}
-	i915 = to_i915(display->drm);
+	gsc_context = display->hdcp.gsc_context;
 
 	pairing_info_in.header.api_version = HDCP_API_VERSION;
 	pairing_info_in.header.command_id = WIRED_AKE_SEND_PAIRING_INFO;
@@ -217,9 +220,9 @@ intel_hdcp_gsc_store_pairing_info(struct device *dev, struct hdcp_port_data *dat
 	memcpy(pairing_info_in.e_kh_km, pairing_info->e_kh_km,
 	       HDCP_2_2_E_KH_KM_LEN);
 
-	byte = intel_hdcp_gsc_msg_send(i915, (u8 *)&pairing_info_in,
+	byte = intel_hdcp_gsc_msg_send(gsc_context, &pairing_info_in,
 				       sizeof(pairing_info_in),
-				       (u8 *)&pairing_info_out,
+				       &pairing_info_out,
 				       sizeof(pairing_info_out));
 	if (byte < 0) {
 		drm_dbg_kms(display->drm, "intel_hdcp_gsc_msg_send failed. %zd\n", byte);
@@ -236,15 +239,15 @@ intel_hdcp_gsc_store_pairing_info(struct device *dev, struct hdcp_port_data *dat
 	return 0;
 }
 
-int
+static int
 intel_hdcp_gsc_initiate_locality_check(struct device *dev,
 				       struct hdcp_port_data *data,
 				       struct hdcp2_lc_init *lc_init_data)
 {
 	struct wired_cmd_init_locality_check_in lc_init_in = {};
 	struct wired_cmd_init_locality_check_out lc_init_out = {};
+	struct intel_hdcp_gsc_context *gsc_context;
 	struct intel_display *display;
-	struct drm_i915_private *i915;
 	ssize_t byte;
 
 	if (!dev || !data || !lc_init_data)
@@ -255,7 +258,7 @@ intel_hdcp_gsc_initiate_locality_check(struct device *dev,
 		dev_err(dev, "DRM not initialized, aborting HDCP.\n");
 		return -ENODEV;
 	}
-	i915 = to_i915(display->drm);
+	gsc_context = display->hdcp.gsc_context;
 
 	lc_init_in.header.api_version = HDCP_API_VERSION;
 	lc_init_in.header.command_id = WIRED_INIT_LOCALITY_CHECK;
@@ -266,8 +269,8 @@ intel_hdcp_gsc_initiate_locality_check(struct device *dev,
 	lc_init_in.port.physical_port = (u8)data->hdcp_ddi;
 	lc_init_in.port.attached_transcoder = (u8)data->hdcp_transcoder;
 
-	byte = intel_hdcp_gsc_msg_send(i915, (u8 *)&lc_init_in, sizeof(lc_init_in),
-				       (u8 *)&lc_init_out, sizeof(lc_init_out));
+	byte = intel_hdcp_gsc_msg_send(gsc_context, &lc_init_in, sizeof(lc_init_in),
+				       &lc_init_out, sizeof(lc_init_out));
 	if (byte < 0) {
 		drm_dbg_kms(display->drm, "intel_hdcp_gsc_msg_send failed. %zd\n", byte);
 		return byte;
@@ -285,14 +288,14 @@ intel_hdcp_gsc_initiate_locality_check(struct device *dev,
 	return 0;
 }
 
-int
+static int
 intel_hdcp_gsc_verify_lprime(struct device *dev, struct hdcp_port_data *data,
 			     struct hdcp2_lc_send_lprime *rx_lprime)
 {
 	struct wired_cmd_validate_locality_in verify_lprime_in = {};
 	struct wired_cmd_validate_locality_out verify_lprime_out = {};
+	struct intel_hdcp_gsc_context *gsc_context;
 	struct intel_display *display;
-	struct drm_i915_private *i915;
 	ssize_t byte;
 
 	if (!dev || !data || !rx_lprime)
@@ -303,7 +306,7 @@ intel_hdcp_gsc_verify_lprime(struct device *dev, struct hdcp_port_data *data,
 		dev_err(dev, "DRM not initialized, aborting HDCP.\n");
 		return -ENODEV;
 	}
-	i915 = to_i915(display->drm);
+	gsc_context = display->hdcp.gsc_context;
 
 	verify_lprime_in.header.api_version = HDCP_API_VERSION;
 	verify_lprime_in.header.command_id = WIRED_VALIDATE_LOCALITY;
@@ -318,9 +321,9 @@ intel_hdcp_gsc_verify_lprime(struct device *dev, struct hdcp_port_data *data,
 	memcpy(verify_lprime_in.l_prime, rx_lprime->l_prime,
 	       HDCP_2_2_L_PRIME_LEN);
 
-	byte = intel_hdcp_gsc_msg_send(i915, (u8 *)&verify_lprime_in,
+	byte = intel_hdcp_gsc_msg_send(gsc_context, &verify_lprime_in,
 				       sizeof(verify_lprime_in),
-				       (u8 *)&verify_lprime_out,
+				       &verify_lprime_out,
 				       sizeof(verify_lprime_out));
 	if (byte < 0) {
 		drm_dbg_kms(display->drm, "intel_hdcp_gsc_msg_send failed. %zd\n", byte);
@@ -337,14 +340,15 @@ intel_hdcp_gsc_verify_lprime(struct device *dev, struct hdcp_port_data *data,
 	return 0;
 }
 
-int intel_hdcp_gsc_get_session_key(struct device *dev,
-				   struct hdcp_port_data *data,
-				   struct hdcp2_ske_send_eks *ske_data)
+static int
+intel_hdcp_gsc_get_session_key(struct device *dev,
+			       struct hdcp_port_data *data,
+			       struct hdcp2_ske_send_eks *ske_data)
 {
 	struct wired_cmd_get_session_key_in get_skey_in = {};
 	struct wired_cmd_get_session_key_out get_skey_out = {};
+	struct intel_hdcp_gsc_context *gsc_context;
 	struct intel_display *display;
-	struct drm_i915_private *i915;
 	ssize_t byte;
 
 	if (!dev || !data || !ske_data)
@@ -355,7 +359,7 @@ int intel_hdcp_gsc_get_session_key(struct device *dev,
 		dev_err(dev, "DRM not initialized, aborting HDCP.\n");
 		return -ENODEV;
 	}
-	i915 = to_i915(display->drm);
+	gsc_context = display->hdcp.gsc_context;
 
 	get_skey_in.header.api_version = HDCP_API_VERSION;
 	get_skey_in.header.command_id = WIRED_GET_SESSION_KEY;
@@ -366,8 +370,8 @@ int intel_hdcp_gsc_get_session_key(struct device *dev,
 	get_skey_in.port.physical_port = (u8)data->hdcp_ddi;
 	get_skey_in.port.attached_transcoder = (u8)data->hdcp_transcoder;
 
-	byte = intel_hdcp_gsc_msg_send(i915, (u8 *)&get_skey_in, sizeof(get_skey_in),
-				       (u8 *)&get_skey_out, sizeof(get_skey_out));
+	byte = intel_hdcp_gsc_msg_send(gsc_context, &get_skey_in, sizeof(get_skey_in),
+				       &get_skey_out, sizeof(get_skey_out));
 	if (byte < 0) {
 		drm_dbg_kms(display->drm, "intel_hdcp_gsc_msg_send failed. %zd\n", byte);
 		return byte;
@@ -387,7 +391,7 @@ int intel_hdcp_gsc_get_session_key(struct device *dev,
 	return 0;
 }
 
-int
+static int
 intel_hdcp_gsc_repeater_check_flow_prepare_ack(struct device *dev,
 					       struct hdcp_port_data *data,
 					       struct hdcp2_rep_send_receiverid_list
@@ -397,8 +401,8 @@ intel_hdcp_gsc_repeater_check_flow_prepare_ack(struct device *dev,
 {
 	struct wired_cmd_verify_repeater_in verify_repeater_in = {};
 	struct wired_cmd_verify_repeater_out verify_repeater_out = {};
+	struct intel_hdcp_gsc_context *gsc_context;
 	struct intel_display *display;
-	struct drm_i915_private *i915;
 	ssize_t byte;
 
 	if (!dev || !rep_topology || !rep_send_ack || !data)
@@ -409,7 +413,7 @@ intel_hdcp_gsc_repeater_check_flow_prepare_ack(struct device *dev,
 		dev_err(dev, "DRM not initialized, aborting HDCP.\n");
 		return -ENODEV;
 	}
-	i915 = to_i915(display->drm);
+	gsc_context = display->hdcp.gsc_context;
 
 	verify_repeater_in.header.api_version = HDCP_API_VERSION;
 	verify_repeater_in.header.command_id = WIRED_VERIFY_REPEATER;
@@ -430,9 +434,9 @@ intel_hdcp_gsc_repeater_check_flow_prepare_ack(struct device *dev,
 	memcpy(verify_repeater_in.receiver_ids, rep_topology->receiver_ids,
 	       HDCP_2_2_RECEIVER_IDS_MAX_LEN);
 
-	byte = intel_hdcp_gsc_msg_send(i915, (u8 *)&verify_repeater_in,
+	byte = intel_hdcp_gsc_msg_send(gsc_context, &verify_repeater_in,
 				       sizeof(verify_repeater_in),
-				       (u8 *)&verify_repeater_out,
+				       &verify_repeater_out,
 				       sizeof(verify_repeater_out));
 	if (byte < 0) {
 		drm_dbg_kms(display->drm, "intel_hdcp_gsc_msg_send failed. %zd\n", byte);
@@ -453,14 +457,15 @@ intel_hdcp_gsc_repeater_check_flow_prepare_ack(struct device *dev,
 	return 0;
 }
 
-int intel_hdcp_gsc_verify_mprime(struct device *dev,
-				 struct hdcp_port_data *data,
-				 struct hdcp2_rep_stream_ready *stream_ready)
+static int
+intel_hdcp_gsc_verify_mprime(struct device *dev,
+			     struct hdcp_port_data *data,
+			     struct hdcp2_rep_stream_ready *stream_ready)
 {
 	struct wired_cmd_repeater_auth_stream_req_in *verify_mprime_in;
 	struct wired_cmd_repeater_auth_stream_req_out verify_mprime_out = {};
+	struct intel_hdcp_gsc_context *gsc_context;
 	struct intel_display *display;
-	struct drm_i915_private *i915;
 	ssize_t byte;
 	size_t cmd_size;
 
@@ -472,7 +477,7 @@ int intel_hdcp_gsc_verify_mprime(struct device *dev,
 		dev_err(dev, "DRM not initialized, aborting HDCP.\n");
 		return -ENODEV;
 	}
-	i915 = to_i915(display->drm);
+	gsc_context = display->hdcp.gsc_context;
 
 	cmd_size = struct_size(verify_mprime_in, streams, data->k);
 	if (cmd_size == SIZE_MAX)
@@ -499,8 +504,8 @@ int intel_hdcp_gsc_verify_mprime(struct device *dev,
 
 	verify_mprime_in->k = cpu_to_be16(data->k);
 
-	byte = intel_hdcp_gsc_msg_send(i915, (u8 *)verify_mprime_in, cmd_size,
-				       (u8 *)&verify_mprime_out,
+	byte = intel_hdcp_gsc_msg_send(gsc_context, verify_mprime_in, cmd_size,
+				       &verify_mprime_out,
 				       sizeof(verify_mprime_out));
 	kfree(verify_mprime_in);
 	if (byte < 0) {
@@ -518,13 +523,13 @@ int intel_hdcp_gsc_verify_mprime(struct device *dev,
 	return 0;
 }
 
-int intel_hdcp_gsc_enable_authentication(struct device *dev,
-					 struct hdcp_port_data *data)
+static int intel_hdcp_gsc_enable_authentication(struct device *dev,
+						struct hdcp_port_data *data)
 {
 	struct wired_cmd_enable_auth_in enable_auth_in = {};
 	struct wired_cmd_enable_auth_out enable_auth_out = {};
+	struct intel_hdcp_gsc_context *gsc_context;
 	struct intel_display *display;
-	struct drm_i915_private *i915;
 	ssize_t byte;
 
 	if (!dev || !data)
@@ -535,7 +540,7 @@ int intel_hdcp_gsc_enable_authentication(struct device *dev,
 		dev_err(dev, "DRM not initialized, aborting HDCP.\n");
 		return -ENODEV;
 	}
-	i915 = to_i915(display->drm);
+	gsc_context = display->hdcp.gsc_context;
 
 	enable_auth_in.header.api_version = HDCP_API_VERSION;
 	enable_auth_in.header.command_id = WIRED_ENABLE_AUTH;
@@ -547,9 +552,9 @@ int intel_hdcp_gsc_enable_authentication(struct device *dev,
 	enable_auth_in.port.attached_transcoder = (u8)data->hdcp_transcoder;
 	enable_auth_in.stream_type = data->streams[0].stream_type;
 
-	byte = intel_hdcp_gsc_msg_send(i915, (u8 *)&enable_auth_in,
+	byte = intel_hdcp_gsc_msg_send(gsc_context, &enable_auth_in,
 				       sizeof(enable_auth_in),
-				       (u8 *)&enable_auth_out,
+				       &enable_auth_out,
 				       sizeof(enable_auth_out));
 	if (byte < 0) {
 		drm_dbg_kms(display->drm, "intel_hdcp_gsc_msg_send failed. %zd\n", byte);
@@ -565,13 +570,13 @@ int intel_hdcp_gsc_enable_authentication(struct device *dev,
 	return 0;
 }
 
-int
+static int
 intel_hdcp_gsc_close_session(struct device *dev, struct hdcp_port_data *data)
 {
 	struct wired_cmd_close_session_in session_close_in = {};
 	struct wired_cmd_close_session_out session_close_out = {};
+	struct intel_hdcp_gsc_context *gsc_context;
 	struct intel_display *display;
-	struct drm_i915_private *i915;
 	ssize_t byte;
 
 	if (!dev || !data)
@@ -582,7 +587,7 @@ intel_hdcp_gsc_close_session(struct device *dev, struct hdcp_port_data *data)
 		dev_err(dev, "DRM not initialized, aborting HDCP.\n");
 		return -ENODEV;
 	}
-	i915 = to_i915(display->drm);
+	gsc_context = display->hdcp.gsc_context;
 
 	session_close_in.header.api_version = HDCP_API_VERSION;
 	session_close_in.header.command_id = WIRED_CLOSE_SESSION;
@@ -594,9 +599,9 @@ intel_hdcp_gsc_close_session(struct device *dev, struct hdcp_port_data *data)
 	session_close_in.port.physical_port = (u8)data->hdcp_ddi;
 	session_close_in.port.attached_transcoder = (u8)data->hdcp_transcoder;
 
-	byte = intel_hdcp_gsc_msg_send(i915, (u8 *)&session_close_in,
+	byte = intel_hdcp_gsc_msg_send(gsc_context, &session_close_in,
 				       sizeof(session_close_in),
-				       (u8 *)&session_close_out,
+				       &session_close_out,
 				       sizeof(session_close_out));
 	if (byte < 0) {
 		drm_dbg_kms(display->drm, "intel_hdcp_gsc_msg_send failed. %zd\n", byte);
@@ -611,3 +616,57 @@ intel_hdcp_gsc_close_session(struct device *dev, struct hdcp_port_data *data)
 
 	return 0;
 }
+
+static const struct i915_hdcp_ops gsc_hdcp_ops = {
+	.initiate_hdcp2_session = intel_hdcp_gsc_initiate_session,
+	.verify_receiver_cert_prepare_km =
+				intel_hdcp_gsc_verify_receiver_cert_prepare_km,
+	.verify_hprime = intel_hdcp_gsc_verify_hprime,
+	.store_pairing_info = intel_hdcp_gsc_store_pairing_info,
+	.initiate_locality_check = intel_hdcp_gsc_initiate_locality_check,
+	.verify_lprime = intel_hdcp_gsc_verify_lprime,
+	.get_session_key = intel_hdcp_gsc_get_session_key,
+	.repeater_check_flow_prepare_ack =
+				intel_hdcp_gsc_repeater_check_flow_prepare_ack,
+	.verify_mprime = intel_hdcp_gsc_verify_mprime,
+	.enable_hdcp_authentication = intel_hdcp_gsc_enable_authentication,
+	.close_hdcp_session = intel_hdcp_gsc_close_session,
+};
+
+int intel_hdcp_gsc_init(struct intel_display *display)
+{
+	struct intel_hdcp_gsc_context *gsc_context;
+	struct i915_hdcp_arbiter *arbiter;
+	int ret = 0;
+
+	arbiter = kzalloc(sizeof(*arbiter), GFP_KERNEL);
+	if (!arbiter)
+		return -ENOMEM;
+
+	mutex_lock(&display->hdcp.hdcp_mutex);
+
+	gsc_context = intel_hdcp_gsc_context_alloc(display->drm);
+	if (IS_ERR(gsc_context)) {
+		ret = PTR_ERR(gsc_context);
+		kfree(arbiter);
+		goto out;
+	}
+
+	display->hdcp.arbiter = arbiter;
+	display->hdcp.arbiter->hdcp_dev = display->drm->dev;
+	display->hdcp.arbiter->ops = &gsc_hdcp_ops;
+	display->hdcp.gsc_context = gsc_context;
+
+out:
+	mutex_unlock(&display->hdcp.hdcp_mutex);
+
+	return ret;
+}
+
+void intel_hdcp_gsc_fini(struct intel_display *display)
+{
+	intel_hdcp_gsc_context_free(display->hdcp.gsc_context);
+	display->hdcp.gsc_context = NULL;
+	kfree(display->hdcp.arbiter);
+	display->hdcp.arbiter = NULL;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_gsc_message.h b/drivers/gpu/drm/i915/display/intel_hdcp_gsc_message.h
index 2d597f27e931..9f54157a4a3e 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp_gsc_message.h
+++ b/drivers/gpu/drm/i915/display/intel_hdcp_gsc_message.h
@@ -6,68 +6,9 @@
 #ifndef __INTEL_HDCP_GSC_MESSAGE_H__
 #define __INTEL_HDCP_GSC_MESSAGE_H__
 
-#include <linux/types.h>
-
-struct device;
-struct drm_i915_private;
-struct hdcp_port_data;
-struct hdcp2_ake_init;
-struct hdcp2_ake_send_cert;
-struct hdcp2_ake_no_stored_km;
-struct hdcp2_ake_send_hprime;
-struct hdcp2_ake_send_pairing_info;
-struct hdcp2_lc_init;
-struct hdcp2_lc_send_lprime;
-struct hdcp2_ske_send_eks;
-struct hdcp2_rep_send_receiverid_list;
-struct hdcp2_rep_send_ack;
-struct hdcp2_rep_stream_ready;
 struct intel_display;
 
-ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in,
-				size_t msg_in_len, u8 *msg_out,
-				size_t msg_out_len);
-bool intel_hdcp_gsc_check_status(struct intel_display *display);
-int
-intel_hdcp_gsc_initiate_session(struct device *dev, struct hdcp_port_data *data,
-				struct hdcp2_ake_init *ake_data);
-int
-intel_hdcp_gsc_verify_receiver_cert_prepare_km(struct device *dev,
-					       struct hdcp_port_data *data,
-					       struct hdcp2_ake_send_cert *rx_cert,
-					       bool *km_stored,
-					       struct hdcp2_ake_no_stored_km
-					       *ek_pub_km,
-					       size_t *msg_sz);
-int
-intel_hdcp_gsc_verify_hprime(struct device *dev, struct hdcp_port_data *data,
-			     struct hdcp2_ake_send_hprime *rx_hprime);
-int
-intel_hdcp_gsc_store_pairing_info(struct device *dev, struct hdcp_port_data *data,
-				  struct hdcp2_ake_send_pairing_info *pairing_info);
-int
-intel_hdcp_gsc_initiate_locality_check(struct device *dev,
-				       struct hdcp_port_data *data,
-				       struct hdcp2_lc_init *lc_init_data);
-int
-intel_hdcp_gsc_verify_lprime(struct device *dev, struct hdcp_port_data *data,
-			     struct hdcp2_lc_send_lprime *rx_lprime);
-int intel_hdcp_gsc_get_session_key(struct device *dev,
-				   struct hdcp_port_data *data,
-				   struct hdcp2_ske_send_eks *ske_data);
-int
-intel_hdcp_gsc_repeater_check_flow_prepare_ack(struct device *dev,
-					       struct hdcp_port_data *data,
-					       struct hdcp2_rep_send_receiverid_list
-					       *rep_topology,
-					       struct hdcp2_rep_send_ack
-					       *rep_send_ack);
-int intel_hdcp_gsc_verify_mprime(struct device *dev,
-				 struct hdcp_port_data *data,
-				 struct hdcp2_rep_stream_ready *stream_ready);
-int intel_hdcp_gsc_enable_authentication(struct device *dev,
-					 struct hdcp_port_data *data);
-int
-intel_hdcp_gsc_close_session(struct device *dev, struct hdcp_port_data *data);
+int intel_hdcp_gsc_init(struct intel_display *display);
+void intel_hdcp_gsc_fini(struct intel_display *display);
 
 #endif /* __INTEL_HDCP_GSC_MESSAGE_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index f9fa17e1f584..98033471902c 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -38,14 +38,15 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/intel/intel_lpe_audio.h>
 
 #include <media/cec-notifier.h>
 
 #include "g4x_hdmi.h"
-#include "i915_drv.h"
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "intel_atomic.h"
 #include "intel_audio.h"
 #include "intel_connector.h"
@@ -715,7 +716,7 @@ intel_hdmi_compute_avi_infoframe(struct intel_encoder *encoder,
 	struct hdmi_avi_infoframe *frame = &crtc_state->infoframes.avi.avi;
 	const struct drm_display_mode *adjusted_mode =
 		&crtc_state->hw.adjusted_mode;
-	struct drm_connector *connector = conn_state->connector;
+	struct intel_connector *connector = to_intel_connector(conn_state->connector);
 	int ret;
 
 	if (!crtc_state->has_infoframe)
@@ -724,7 +725,7 @@ intel_hdmi_compute_avi_infoframe(struct intel_encoder *encoder,
 	crtc_state->infoframes.enable |=
 		intel_hdmi_infoframe_enable(HDMI_INFOFRAME_TYPE_AVI);
 
-	ret = drm_hdmi_avi_infoframe_from_display_mode(frame, connector,
+	ret = drm_hdmi_avi_infoframe_from_display_mode(frame, &connector->base,
 						       adjusted_mode);
 	if (ret)
 		return false;
@@ -743,7 +744,7 @@ intel_hdmi_compute_avi_infoframe(struct intel_encoder *encoder,
 		    crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB);
 
 	if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_RGB) {
-		drm_hdmi_avi_infoframe_quant_range(frame, connector,
+		drm_hdmi_avi_infoframe_quant_range(frame, &connector->base,
 						   adjusted_mode,
 						   crtc_state->limited_color_range ?
 						   HDMI_QUANTIZATION_RANGE_LIMITED :
@@ -769,7 +770,7 @@ intel_hdmi_compute_spd_infoframe(struct intel_encoder *encoder,
 				 struct intel_crtc_state *crtc_state,
 				 struct drm_connector_state *conn_state)
 {
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	struct intel_display *display = to_intel_display(crtc_state);
 	struct hdmi_spd_infoframe *frame = &crtc_state->infoframes.spd.spd;
 	int ret;
 
@@ -779,7 +780,7 @@ intel_hdmi_compute_spd_infoframe(struct intel_encoder *encoder,
 	crtc_state->infoframes.enable |=
 		intel_hdmi_infoframe_enable(HDMI_INFOFRAME_TYPE_SPD);
 
-	if (IS_DGFX(i915))
+	if (display->platform.dgfx)
 		ret = hdmi_spd_infoframe_init(frame, "Intel", "Discrete gfx");
 	else
 		ret = hdmi_spd_infoframe_init(frame, "Intel", "Integrated gfx");
@@ -979,7 +980,6 @@ static bool intel_hdmi_set_gcp_infoframe(struct intel_encoder *encoder,
 					 const struct drm_connector_state *conn_state)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	i915_reg_t reg;
 
@@ -989,9 +989,9 @@ static bool intel_hdmi_set_gcp_infoframe(struct intel_encoder *encoder,
 
 	if (HAS_DDI(display))
 		reg = HSW_TVIDEO_DIP_GCP(display, crtc_state->cpu_transcoder);
-	else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+	else if (display->platform.valleyview || display->platform.cherryview)
 		reg = VLV_TVIDEO_DIP_GCP(crtc->pipe);
-	else if (HAS_PCH_SPLIT(dev_priv))
+	else if (HAS_PCH_SPLIT(display))
 		reg = TVIDEO_DIP_GCP(crtc->pipe);
 	else
 		return false;
@@ -1005,7 +1005,6 @@ void intel_hdmi_read_gcp_infoframe(struct intel_encoder *encoder,
 				   struct intel_crtc_state *crtc_state)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	i915_reg_t reg;
 
@@ -1015,9 +1014,9 @@ void intel_hdmi_read_gcp_infoframe(struct intel_encoder *encoder,
 
 	if (HAS_DDI(display))
 		reg = HSW_TVIDEO_DIP_GCP(display, crtc_state->cpu_transcoder);
-	else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+	else if (display->platform.valleyview || display->platform.cherryview)
 		reg = VLV_TVIDEO_DIP_GCP(crtc->pipe);
-	else if (HAS_PCH_SPLIT(dev_priv))
+	else if (HAS_PCH_SPLIT(display))
 		reg = TVIDEO_DIP_GCP(crtc->pipe);
 	else
 		return;
@@ -1029,9 +1028,9 @@ static void intel_hdmi_compute_gcp_infoframe(struct intel_encoder *encoder,
 					     struct intel_crtc_state *crtc_state,
 					     struct drm_connector_state *conn_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct intel_display *display = to_intel_display(encoder);
 
-	if (IS_G4X(dev_priv) || !crtc_state->has_infoframe)
+	if (display->platform.g4x || !crtc_state->has_infoframe)
 		return;
 
 	crtc_state->infoframes.enable |=
@@ -1539,7 +1538,6 @@ int intel_hdmi_hdcp_toggle_signalling(struct intel_digital_port *dig_port,
 	struct intel_display *display = to_intel_display(dig_port);
 	struct intel_hdmi *hdmi = &dig_port->hdmi;
 	struct intel_connector *connector = hdmi->attached_connector;
-	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	int ret;
 
 	if (!enable)
@@ -1558,7 +1556,7 @@ int intel_hdmi_hdcp_toggle_signalling(struct intel_digital_port *dig_port,
 	 * WA: To fix incorrect positioning of the window of
 	 * opportunity and enc_en signalling in KABYLAKE.
 	 */
-	if (IS_KABYLAKE(dev_priv) && enable)
+	if (display->platform.kabylake && enable)
 		return kbl_repositioning_enc_en_signal(connector,
 						       cpu_transcoder);
 
@@ -1570,7 +1568,6 @@ bool intel_hdmi_hdcp_check_link_once(struct intel_digital_port *dig_port,
 				     struct intel_connector *connector)
 {
 	struct intel_display *display = to_intel_display(dig_port);
-	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
 	enum port port = dig_port->base.port;
 	enum transcoder cpu_transcoder = connector->hdcp.cpu_transcoder;
 	int ret;
@@ -1583,15 +1580,15 @@ bool intel_hdmi_hdcp_check_link_once(struct intel_digital_port *dig_port,
 	if (ret)
 		return false;
 
-	intel_de_write(i915, HDCP_RPRIME(i915, cpu_transcoder, port), ri.reg);
+	intel_de_write(display, HDCP_RPRIME(display, cpu_transcoder, port), ri.reg);
 
 	/* Wait for Ri prime match */
-	if (wait_for((intel_de_read(i915, HDCP_STATUS(i915, cpu_transcoder, port)) &
+	if (wait_for((intel_de_read(display, HDCP_STATUS(display, cpu_transcoder, port)) &
 		      (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC)) ==
 		     (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) {
 		drm_dbg_kms(display->drm, "Ri' mismatch detected (%x)\n",
-			    intel_de_read(i915, HDCP_STATUS(i915, cpu_transcoder,
-							    port)));
+			    intel_de_read(display, HDCP_STATUS(display, cpu_transcoder,
+							       port)));
 		return false;
 	}
 	return true;
@@ -1814,14 +1811,13 @@ static const struct intel_hdcp_shim intel_hdmi_hdcp_shim = {
 static int intel_hdmi_source_max_tmds_clock(struct intel_encoder *encoder)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	int max_tmds_clock, vbt_max_tmds_clock;
 
-	if (DISPLAY_VER(display) >= 13 || IS_ALDERLAKE_S(dev_priv))
+	if (DISPLAY_VER(display) >= 13 || display->platform.alderlake_s)
 		max_tmds_clock = 600000;
 	else if (DISPLAY_VER(display) >= 10)
 		max_tmds_clock = 594000;
-	else if (DISPLAY_VER(display) >= 8 || IS_HASWELL(dev_priv))
+	else if (DISPLAY_VER(display) >= 8 || display->platform.haswell)
 		max_tmds_clock = 300000;
 	else if (DISPLAY_VER(display) >= 5)
 		max_tmds_clock = 225000;
@@ -1880,7 +1876,6 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi,
 		      bool has_hdmi_sink)
 {
 	struct intel_display *display = to_intel_display(hdmi);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_encoder *encoder = &hdmi_to_dig_port(hdmi)->base;
 
 	if (clock < 25000)
@@ -1890,16 +1885,16 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi,
 		return MODE_CLOCK_HIGH;
 
 	/* GLK DPLL can't generate 446-480 MHz */
-	if (IS_GEMINILAKE(dev_priv) && clock > 446666 && clock < 480000)
+	if (display->platform.geminilake && clock > 446666 && clock < 480000)
 		return MODE_CLOCK_RANGE;
 
 	/* BXT/GLK DPLL can't generate 223-240 MHz */
-	if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) &&
+	if ((display->platform.geminilake || display->platform.broxton) &&
 	    clock > 223333 && clock < 240000)
 		return MODE_CLOCK_RANGE;
 
 	/* CHV DPLL can't generate 216-240 MHz */
-	if (IS_CHERRYVIEW(dev_priv) && clock > 216000 && clock < 240000)
+	if (display->platform.cherryview && clock > 216000 && clock < 240000)
 		return MODE_CLOCK_RANGE;
 
 	/* ICL+ combo PHY PLL can't generate 500-533.2 MHz */
@@ -1943,11 +1938,12 @@ static bool intel_hdmi_source_bpc_possible(struct intel_display *display, int bp
 	}
 }
 
-static bool intel_hdmi_sink_bpc_possible(struct drm_connector *connector,
+static bool intel_hdmi_sink_bpc_possible(struct drm_connector *_connector,
 					 int bpc, bool has_hdmi_sink,
 					 enum intel_output_format sink_format)
 {
-	const struct drm_display_info *info = &connector->display_info;
+	struct intel_connector *connector = to_intel_connector(_connector);
+	const struct drm_display_info *info = &connector->base.display_info;
 	const struct drm_hdmi_info *hdmi = &info->hdmi;
 
 	switch (bpc) {
@@ -1976,12 +1972,13 @@ static bool intel_hdmi_sink_bpc_possible(struct drm_connector *connector,
 }
 
 static enum drm_mode_status
-intel_hdmi_mode_clock_valid(struct drm_connector *connector, int clock,
+intel_hdmi_mode_clock_valid(struct drm_connector *_connector, int clock,
 			    bool has_hdmi_sink,
 			    enum intel_output_format sink_format)
 {
-	struct intel_display *display = to_intel_display(connector->dev);
-	struct intel_hdmi *hdmi = intel_attached_hdmi(to_intel_connector(connector));
+	struct intel_connector *connector = to_intel_connector(_connector);
+	struct intel_display *display = to_intel_display(connector);
+	struct intel_hdmi *hdmi = intel_attached_hdmi(connector);
 	enum drm_mode_status status = MODE_OK;
 	int bpc;
 
@@ -1996,7 +1993,8 @@ intel_hdmi_mode_clock_valid(struct drm_connector *connector, int clock,
 		if (!intel_hdmi_source_bpc_possible(display, bpc))
 			continue;
 
-		if (!intel_hdmi_sink_bpc_possible(connector, bpc, has_hdmi_sink, sink_format))
+		if (!intel_hdmi_sink_bpc_possible(&connector->base, bpc, has_hdmi_sink,
+						  sink_format))
 			continue;
 
 		status = hdmi_port_clock_valid(hdmi, tmds_clock, true, has_hdmi_sink);
@@ -2011,15 +2009,16 @@ intel_hdmi_mode_clock_valid(struct drm_connector *connector, int clock,
 }
 
 static enum drm_mode_status
-intel_hdmi_mode_valid(struct drm_connector *connector,
+intel_hdmi_mode_valid(struct drm_connector *_connector,
 		      const struct drm_display_mode *mode)
 {
-	struct intel_display *display = to_intel_display(connector->dev);
-	struct intel_hdmi *hdmi = intel_attached_hdmi(to_intel_connector(connector));
+	struct intel_connector *connector = to_intel_connector(_connector);
+	struct intel_display *display = to_intel_display(connector);
+	struct intel_hdmi *hdmi = intel_attached_hdmi(connector);
 	enum drm_mode_status status;
 	int clock = mode->clock;
-	int max_dotclk = to_i915(connector->dev)->display.cdclk.max_dotclk_freq;
-	bool has_hdmi_sink = intel_has_hdmi_sink(hdmi, connector->state);
+	int max_dotclk = display->cdclk.max_dotclk_freq;
+	bool has_hdmi_sink = intel_has_hdmi_sink(hdmi, connector->base.state);
 	bool ycbcr_420_only;
 	enum intel_output_format sink_format;
 
@@ -2048,22 +2047,23 @@ intel_hdmi_mode_valid(struct drm_connector *connector,
 	if (clock > 600000)
 		return MODE_CLOCK_HIGH;
 
-	ycbcr_420_only = drm_mode_is_420_only(&connector->display_info, mode);
+	ycbcr_420_only = drm_mode_is_420_only(&connector->base.display_info, mode);
 
 	if (ycbcr_420_only)
 		sink_format = INTEL_OUTPUT_FORMAT_YCBCR420;
 	else
 		sink_format = INTEL_OUTPUT_FORMAT_RGB;
 
-	status = intel_hdmi_mode_clock_valid(connector, clock, has_hdmi_sink, sink_format);
+	status = intel_hdmi_mode_clock_valid(&connector->base, clock, has_hdmi_sink, sink_format);
 	if (status != MODE_OK) {
 		if (ycbcr_420_only ||
-		    !connector->ycbcr_420_allowed ||
-		    !drm_mode_is_420_also(&connector->display_info, mode))
+		    !connector->base.ycbcr_420_allowed ||
+		    !drm_mode_is_420_also(&connector->base.display_info, mode))
 			return status;
 
 		sink_format = INTEL_OUTPUT_FORMAT_YCBCR420;
-		status = intel_hdmi_mode_clock_valid(connector, clock, has_hdmi_sink, sink_format);
+		status = intel_hdmi_mode_clock_valid(&connector->base, clock, has_hdmi_sink,
+						     sink_format);
 		if (status != MODE_OK)
 			return status;
 	}
@@ -2074,16 +2074,16 @@ intel_hdmi_mode_valid(struct drm_connector *connector,
 bool intel_hdmi_bpc_possible(const struct intel_crtc_state *crtc_state,
 			     int bpc, bool has_hdmi_sink)
 {
-	struct drm_atomic_state *state = crtc_state->uapi.state;
-	struct drm_connector_state *connector_state;
-	struct drm_connector *connector;
+	struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state);
+	struct intel_digital_connector_state *connector_state;
+	struct intel_connector *connector;
 	int i;
 
-	for_each_new_connector_in_state(state, connector, connector_state, i) {
-		if (connector_state->crtc != crtc_state->uapi.crtc)
+	for_each_new_intel_connector_in_state(state, connector, connector_state, i) {
+		if (connector_state->base.crtc != crtc_state->uapi.crtc)
 			continue;
 
-		if (!intel_hdmi_sink_bpc_possible(connector, bpc, has_hdmi_sink,
+		if (!intel_hdmi_sink_bpc_possible(&connector->base, bpc, has_hdmi_sink,
 						  crtc_state->sink_format))
 			return false;
 	}
@@ -2211,7 +2211,7 @@ static bool intel_hdmi_has_audio(struct intel_encoder *encoder,
 				 const struct intel_crtc_state *crtc_state,
 				 const struct drm_connector_state *conn_state)
 {
-	struct drm_connector *connector = conn_state->connector;
+	struct intel_connector *connector = to_intel_connector(conn_state->connector);
 	const struct intel_digital_connector_state *intel_conn_state =
 		to_intel_digital_connector_state(conn_state);
 
@@ -2219,7 +2219,7 @@ static bool intel_hdmi_has_audio(struct intel_encoder *encoder,
 		return false;
 
 	if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO)
-		return connector->display_info.has_audio;
+		return connector->base.display_info.has_audio;
 	else
 		return intel_conn_state->force_audio == HDMI_AUDIO_ON;
 }
@@ -2323,14 +2323,14 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder,
 {
 	struct intel_display *display = to_intel_display(encoder);
 	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
-	struct drm_connector *connector = conn_state->connector;
-	struct drm_scdc *scdc = &connector->display_info.hdmi.scdc;
+	struct intel_connector *connector = to_intel_connector(conn_state->connector);
+	struct drm_scdc *scdc = &connector->base.display_info.hdmi.scdc;
 	int ret;
 
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return -EINVAL;
 
-	if (!connector->interlace_allowed &&
+	if (!connector->base.interlace_allowed &&
 	    adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
 		return -EINVAL;
 
@@ -2425,25 +2425,26 @@ void intel_hdmi_encoder_shutdown(struct intel_encoder *encoder)
 }
 
 static void
-intel_hdmi_unset_edid(struct drm_connector *connector)
+intel_hdmi_unset_edid(struct drm_connector *_connector)
 {
-	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector));
+	struct intel_connector *connector = to_intel_connector(_connector);
+	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
 
 	intel_hdmi->dp_dual_mode.type = DRM_DP_DUAL_MODE_NONE;
 	intel_hdmi->dp_dual_mode.max_tmds_clock = 0;
 
-	drm_edid_free(to_intel_connector(connector)->detect_edid);
-	to_intel_connector(connector)->detect_edid = NULL;
+	drm_edid_free(connector->detect_edid);
+	connector->detect_edid = NULL;
 }
 
 static void
-intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector)
+intel_hdmi_dp_dual_mode_detect(struct drm_connector *_connector)
 {
-	struct intel_display *display = to_intel_display(connector->dev);
-	struct drm_i915_private *dev_priv = to_i915(connector->dev);
-	struct intel_hdmi *hdmi = intel_attached_hdmi(to_intel_connector(connector));
+	struct intel_connector *connector = to_intel_connector(_connector);
+	struct intel_display *display = to_intel_display(connector);
+	struct intel_hdmi *hdmi = intel_attached_hdmi(connector);
 	struct intel_encoder *encoder = &hdmi_to_dig_port(hdmi)->base;
-	struct i2c_adapter *ddc = connector->ddc;
+	struct i2c_adapter *ddc = connector->base.ddc;
 	enum drm_dp_dual_mode_type type;
 
 	type = drm_dp_dual_mode_detect(display->drm, ddc);
@@ -2458,7 +2459,7 @@ intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector)
 	 * if the port is a dual mode capable DP port.
 	 */
 	if (type == DRM_DP_DUAL_MODE_UNKNOWN) {
-		if (!connector->force &&
+		if (!connector->base.force &&
 		    intel_bios_encoder_supports_dp_dual_mode(encoder->devdata)) {
 			drm_dbg_kms(display->drm,
 				    "Assuming DP dual mode adaptor presence based on VBT\n");
@@ -2481,7 +2482,7 @@ intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector)
 		    hdmi->dp_dual_mode.max_tmds_clock);
 
 	/* Older VBTs are often buggy and can't be trusted :( Play it safe. */
-	if ((DISPLAY_VER(display) >= 8 || IS_HASWELL(dev_priv)) &&
+	if ((DISPLAY_VER(display) >= 8 || display->platform.haswell) &&
 	    !intel_bios_encoder_supports_dp_dual_mode(encoder->devdata)) {
 		drm_dbg_kms(display->drm,
 			    "Ignoring DP dual mode adaptor max TMDS clock for native HDMI port\n");
@@ -2490,34 +2491,35 @@ intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector)
 }
 
 static bool
-intel_hdmi_set_edid(struct drm_connector *connector)
+intel_hdmi_set_edid(struct drm_connector *_connector)
 {
-	struct intel_display *display = to_intel_display(connector->dev);
-	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector));
-	struct i2c_adapter *ddc = connector->ddc;
+	struct intel_connector *connector = to_intel_connector(_connector);
+	struct intel_display *display = to_intel_display(connector);
+	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
+	struct i2c_adapter *ddc = connector->base.ddc;
 	intel_wakeref_t wakeref;
 	const struct drm_edid *drm_edid;
 	bool connected = false;
 
 	wakeref = intel_display_power_get(display, POWER_DOMAIN_GMBUS);
 
-	drm_edid = drm_edid_read_ddc(connector, ddc);
+	drm_edid = drm_edid_read_ddc(&connector->base, ddc);
 
 	if (!drm_edid && !intel_gmbus_is_forced_bit(ddc)) {
 		drm_dbg_kms(display->drm,
 			    "HDMI GMBUS EDID read failed, retry using GPIO bit-banging\n");
 		intel_gmbus_force_bit(ddc, true);
-		drm_edid = drm_edid_read_ddc(connector, ddc);
+		drm_edid = drm_edid_read_ddc(&connector->base, ddc);
 		intel_gmbus_force_bit(ddc, false);
 	}
 
 	/* Below we depend on display info having been updated */
-	drm_edid_connector_update(connector, drm_edid);
+	drm_edid_connector_update(&connector->base, drm_edid);
 
-	to_intel_connector(connector)->detect_edid = drm_edid;
+	connector->detect_edid = drm_edid;
 
 	if (drm_edid_is_digital(drm_edid)) {
-		intel_hdmi_dp_dual_mode_detect(connector);
+		intel_hdmi_dp_dual_mode_detect(&connector->base);
 
 		connected = true;
 	}
@@ -2525,28 +2527,29 @@ intel_hdmi_set_edid(struct drm_connector *connector)
 	intel_display_power_put(display, POWER_DOMAIN_GMBUS, wakeref);
 
 	cec_notifier_set_phys_addr(intel_hdmi->cec_notifier,
-				   connector->display_info.source_physical_address);
+				   connector->base.display_info.source_physical_address);
 
 	return connected;
 }
 
 static enum drm_connector_status
-intel_hdmi_detect(struct drm_connector *connector, bool force)
+intel_hdmi_detect(struct drm_connector *_connector, bool force)
 {
-	struct intel_display *display = to_intel_display(connector->dev);
+	struct intel_connector *connector = to_intel_connector(_connector);
+	struct intel_display *display = to_intel_display(connector);
 	enum drm_connector_status status = connector_status_disconnected;
-	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(to_intel_connector(connector));
+	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
 	struct intel_encoder *encoder = &hdmi_to_dig_port(intel_hdmi)->base;
 	intel_wakeref_t wakeref;
 
 	drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s]\n",
-		    connector->base.id, connector->name);
+		    connector->base.base.id, connector->base.name);
 
 	if (!intel_display_device_enabled(display))
 		return connector_status_disconnected;
 
 	if (!intel_display_driver_check_access(display))
-		return connector->status;
+		return connector->base.status;
 
 	wakeref = intel_display_power_get(display, POWER_DOMAIN_GMBUS);
 
@@ -2554,9 +2557,9 @@ intel_hdmi_detect(struct drm_connector *connector, bool force)
 	    !intel_digital_port_connected(encoder))
 		goto out;
 
-	intel_hdmi_unset_edid(connector);
+	intel_hdmi_unset_edid(&connector->base);
 
-	if (intel_hdmi_set_edid(connector))
+	if (intel_hdmi_set_edid(&connector->base))
 		status = connector_status_connected;
 
 out:
@@ -2569,49 +2572,54 @@ out:
 }
 
 static void
-intel_hdmi_force(struct drm_connector *connector)
+intel_hdmi_force(struct drm_connector *_connector)
 {
-	struct intel_display *display = to_intel_display(connector->dev);
+	struct intel_connector *connector = to_intel_connector(_connector);
+	struct intel_display *display = to_intel_display(connector);
 
 	drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s]\n",
-		    connector->base.id, connector->name);
+		    connector->base.base.id, connector->base.name);
 
 	if (!intel_display_driver_check_access(display))
 		return;
 
-	intel_hdmi_unset_edid(connector);
+	intel_hdmi_unset_edid(&connector->base);
 
-	if (connector->status != connector_status_connected)
+	if (connector->base.status != connector_status_connected)
 		return;
 
-	intel_hdmi_set_edid(connector);
+	intel_hdmi_set_edid(&connector->base);
 }
 
-static int intel_hdmi_get_modes(struct drm_connector *connector)
+static int intel_hdmi_get_modes(struct drm_connector *_connector)
 {
+	struct intel_connector *connector = to_intel_connector(_connector);
+
 	/* drm_edid_connector_update() done in ->detect() or ->force() */
-	return drm_edid_connector_add_modes(connector);
+	return drm_edid_connector_add_modes(&connector->base);
 }
 
 static int
-intel_hdmi_connector_register(struct drm_connector *connector)
+intel_hdmi_connector_register(struct drm_connector *_connector)
 {
+	struct intel_connector *connector = to_intel_connector(_connector);
 	int ret;
 
-	ret = intel_connector_register(connector);
+	ret = intel_connector_register(&connector->base);
 	if (ret)
 		return ret;
 
 	return ret;
 }
 
-static void intel_hdmi_connector_unregister(struct drm_connector *connector)
+static void intel_hdmi_connector_unregister(struct drm_connector *_connector)
 {
-	struct cec_notifier *n = intel_attached_hdmi(to_intel_connector(connector))->cec_notifier;
+	struct intel_connector *connector = to_intel_connector(_connector);
+	struct cec_notifier *n = intel_attached_hdmi(connector)->cec_notifier;
 
 	cec_notifier_conn_unregister(n);
 
-	intel_connector_unregister(connector);
+	intel_connector_unregister(&connector->base);
 }
 
 static const struct drm_connector_funcs intel_hdmi_connector_funcs = {
@@ -2627,15 +2635,16 @@ static const struct drm_connector_funcs intel_hdmi_connector_funcs = {
 	.atomic_duplicate_state = intel_digital_connector_duplicate_state,
 };
 
-static int intel_hdmi_connector_atomic_check(struct drm_connector *connector,
+static int intel_hdmi_connector_atomic_check(struct drm_connector *_connector,
 					     struct drm_atomic_state *state)
 {
-	struct intel_display *display = to_intel_display(connector->dev);
+	struct intel_connector *connector = to_intel_connector(_connector);
+	struct intel_display *display = to_intel_display(connector);
 
 	if (HAS_DDI(display))
-		return intel_digital_connector_atomic_check(connector, state);
+		return intel_digital_connector_atomic_check(&connector->base, state);
 	else
-		return g4x_hdmi_connector_atomic_check(connector, state);
+		return g4x_hdmi_connector_atomic_check(&connector->base, state);
 }
 
 static const struct drm_connector_helper_funcs intel_hdmi_connector_helper_funcs = {
@@ -2645,22 +2654,23 @@ static const struct drm_connector_helper_funcs intel_hdmi_connector_helper_funcs
 };
 
 static void
-intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *connector)
+intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *_connector)
 {
+	struct intel_connector *connector = to_intel_connector(_connector);
 	struct intel_display *display = to_intel_display(intel_hdmi);
 
-	intel_attach_force_audio_property(connector);
-	intel_attach_broadcast_rgb_property(connector);
-	intel_attach_aspect_ratio_property(connector);
+	intel_attach_force_audio_property(&connector->base);
+	intel_attach_broadcast_rgb_property(&connector->base);
+	intel_attach_aspect_ratio_property(&connector->base);
 
-	intel_attach_hdmi_colorspace_property(connector);
-	drm_connector_attach_content_type_property(connector);
+	intel_attach_hdmi_colorspace_property(&connector->base);
+	drm_connector_attach_content_type_property(&connector->base);
 
 	if (DISPLAY_VER(display) >= 10)
-		drm_connector_attach_hdr_output_metadata_property(connector);
+		drm_connector_attach_hdr_output_metadata_property(&connector->base);
 
 	if (!HAS_GMCH(display))
-		drm_connector_attach_max_bpc_property(connector, 8, 12);
+		drm_connector_attach_max_bpc_property(&connector->base, 8, 12);
 }
 
 /*
@@ -2682,25 +2692,26 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c
  * True on success, false on failure.
  */
 bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder,
-				       struct drm_connector *connector,
+				       struct drm_connector *_connector,
 				       bool high_tmds_clock_ratio,
 				       bool scrambling)
 {
+	struct intel_connector *connector = to_intel_connector(_connector);
 	struct intel_display *display = to_intel_display(encoder);
 	struct drm_scrambling *sink_scrambling =
-		&connector->display_info.hdmi.scdc.scrambling;
+		&connector->base.display_info.hdmi.scdc.scrambling;
 
 	if (!sink_scrambling->supported)
 		return true;
 
 	drm_dbg_kms(display->drm,
 		    "[CONNECTOR:%d:%s] scrambling=%s, TMDS bit clock ratio=1/%d\n",
-		    connector->base.id, connector->name,
+		    connector->base.base.id, connector->base.name,
 		    str_yes_no(scrambling), high_tmds_clock_ratio ? 40 : 10);
 
 	/* Set TMDS bit clock ratio to 1/40 or 1/10, and enable/disable scrambling */
-	return drm_scdc_set_high_tmds_clock_ratio(connector, high_tmds_clock_ratio) &&
-		drm_scdc_set_scrambling(connector, scrambling);
+	return drm_scdc_set_high_tmds_clock_ratio(&connector->base, high_tmds_clock_ratio) &&
+		drm_scdc_set_scrambling(&connector->base, scrambling);
 }
 
 static u8 chv_encoder_to_ddc_pin(struct intel_encoder *encoder)
@@ -2811,7 +2822,7 @@ static u8 mcc_encoder_to_ddc_pin(struct intel_encoder *encoder)
 
 static u8 rkl_encoder_to_ddc_pin(struct intel_encoder *encoder)
 {
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct intel_display *display = to_intel_display(encoder);
 	enum phy phy = intel_encoder_to_phy(encoder);
 
 	WARN_ON(encoder->port == PORT_C);
@@ -2822,7 +2833,7 @@ static u8 rkl_encoder_to_ddc_pin(struct intel_encoder *encoder)
 	 * combo outputs.  With CMP, the traditional DDI A-D pins are used for
 	 * all outputs.
 	 */
-	if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP && phy >= PHY_C)
+	if (INTEL_PCH_TYPE(display) >= PCH_TGP && phy >= PHY_C)
 		return GMBUS_PIN_9_TC1_ICP + phy - PHY_C;
 
 	return GMBUS_PIN_1_BXT + phy;
@@ -2831,7 +2842,6 @@ static u8 rkl_encoder_to_ddc_pin(struct intel_encoder *encoder)
 static u8 gen9bc_tgp_encoder_to_ddc_pin(struct intel_encoder *encoder)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	enum phy phy = intel_encoder_to_phy(encoder);
 
 	drm_WARN_ON(display->drm, encoder->port == PORT_A);
@@ -2842,7 +2852,7 @@ static u8 gen9bc_tgp_encoder_to_ddc_pin(struct intel_encoder *encoder)
 	 * combo outputs.  With CMP, the traditional DDI A-D pins are used for
 	 * all outputs.
 	 */
-	if (INTEL_PCH_TYPE(i915) >= PCH_TGP && phy >= PHY_C)
+	if (INTEL_PCH_TYPE(display) >= PCH_TGP && phy >= PHY_C)
 		return GMBUS_PIN_9_TC1_ICP + phy - PHY_C;
 
 	return GMBUS_PIN_1_BXT + phy;
@@ -2895,27 +2905,26 @@ static u8 g4x_encoder_to_ddc_pin(struct intel_encoder *encoder)
 static u8 intel_hdmi_default_ddc_pin(struct intel_encoder *encoder)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	u8 ddc_pin;
 
-	if (IS_ALDERLAKE_S(dev_priv))
+	if (display->platform.alderlake_s)
 		ddc_pin = adls_encoder_to_ddc_pin(encoder);
-	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG1)
+	else if (INTEL_PCH_TYPE(display) >= PCH_DG1)
 		ddc_pin = dg1_encoder_to_ddc_pin(encoder);
-	else if (IS_ROCKETLAKE(dev_priv))
+	else if (display->platform.rocketlake)
 		ddc_pin = rkl_encoder_to_ddc_pin(encoder);
-	else if (DISPLAY_VER(display) == 9 && HAS_PCH_TGP(dev_priv))
+	else if (DISPLAY_VER(display) == 9 && HAS_PCH_TGP(display))
 		ddc_pin = gen9bc_tgp_encoder_to_ddc_pin(encoder);
-	else if ((IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv)) &&
-		 HAS_PCH_TGP(dev_priv))
+	else if ((display->platform.jasperlake || display->platform.elkhartlake) &&
+		 HAS_PCH_TGP(display))
 		ddc_pin = mcc_encoder_to_ddc_pin(encoder);
-	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
+	else if (INTEL_PCH_TYPE(display) >= PCH_ICP)
 		ddc_pin = icl_encoder_to_ddc_pin(encoder);
-	else if (HAS_PCH_CNP(dev_priv))
+	else if (HAS_PCH_CNP(display))
 		ddc_pin = cnp_encoder_to_ddc_pin(encoder);
-	else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv))
+	else if (display->platform.geminilake || display->platform.broxton)
 		ddc_pin = bxt_encoder_to_ddc_pin(encoder);
-	else if (IS_CHERRYVIEW(dev_priv))
+	else if (display->platform.cherryview)
 		ddc_pin = chv_encoder_to_ddc_pin(encoder);
 	else
 		ddc_pin = g4x_encoder_to_ddc_pin(encoder);
@@ -2989,15 +2998,13 @@ static u8 intel_hdmi_ddc_pin(struct intel_encoder *encoder)
 void intel_infoframe_init(struct intel_digital_port *dig_port)
 {
 	struct intel_display *display = to_intel_display(dig_port);
-	struct drm_i915_private *dev_priv =
-		to_i915(dig_port->base.base.dev);
 
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+	if (display->platform.valleyview || display->platform.cherryview) {
 		dig_port->write_infoframe = vlv_write_infoframe;
 		dig_port->read_infoframe = vlv_read_infoframe;
 		dig_port->set_infoframes = vlv_set_infoframes;
 		dig_port->infoframes_enabled = vlv_infoframes_enabled;
-	} else if (IS_G4X(dev_priv)) {
+	} else if (display->platform.g4x) {
 		dig_port->write_infoframe = g4x_write_infoframe;
 		dig_port->read_infoframe = g4x_read_infoframe;
 		dig_port->set_infoframes = g4x_set_infoframes;
@@ -3014,7 +3021,7 @@ void intel_infoframe_init(struct intel_digital_port *dig_port)
 			dig_port->set_infoframes = hsw_set_infoframes;
 			dig_port->infoframes_enabled = hsw_infoframes_enabled;
 		}
-	} else if (HAS_PCH_IBX(dev_priv)) {
+	} else if (HAS_PCH_IBX(display)) {
 		dig_port->write_infoframe = ibx_write_infoframe;
 		dig_port->read_infoframe = ibx_read_infoframe;
 		dig_port->set_infoframes = ibx_set_infoframes;
diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c
index 6885e5a09079..fc5d8928c37e 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -183,9 +183,7 @@ static bool intel_hpd_irq_storm_detect(struct intel_display *display,
 
 static bool detection_work_enabled(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
-	lockdep_assert_held(&i915->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	return display->hotplug.detection_work_enabled;
 }
@@ -195,7 +193,7 @@ mod_delayed_detection_work(struct intel_display *display, struct delayed_work *w
 {
 	struct drm_i915_private *i915 = to_i915(display->drm);
 
-	lockdep_assert_held(&i915->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	if (!detection_work_enabled(display))
 		return false;
@@ -208,7 +206,7 @@ queue_delayed_detection_work(struct intel_display *display, struct delayed_work
 {
 	struct drm_i915_private *i915 = to_i915(display->drm);
 
-	lockdep_assert_held(&i915->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	if (!detection_work_enabled(display))
 		return false;
@@ -221,7 +219,7 @@ queue_detection_work(struct intel_display *display, struct work_struct *work)
 {
 	struct drm_i915_private *i915 = to_i915(display->drm);
 
-	lockdep_assert_held(&i915->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	if (!detection_work_enabled(display))
 		return false;
@@ -232,12 +230,11 @@ queue_detection_work(struct intel_display *display, struct work_struct *work)
 static void
 intel_hpd_irq_storm_switch_to_polling(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct drm_connector_list_iter conn_iter;
 	struct intel_connector *connector;
 	bool hpd_disabled = false;
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	drm_connector_list_iter_begin(display->drm, &conn_iter);
 	for_each_intel_connector_iter(connector, &conn_iter) {
@@ -276,7 +273,6 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 {
 	struct intel_display *display =
 		container_of(work, typeof(*display), hotplug.reenable_work.work);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct drm_connector_list_iter conn_iter;
 	struct intel_connector *connector;
 	struct ref_tracker *wakeref;
@@ -284,7 +280,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 
 	wakeref = intel_display_rpm_get(display);
 
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	drm_connector_list_iter_begin(display->drm, &conn_iter);
 	for_each_intel_connector_iter(connector, &conn_iter) {
@@ -308,7 +304,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 
 	intel_hpd_irq_setup(display);
 
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 
 	intel_display_rpm_put(display, wakeref);
 }
@@ -376,9 +372,7 @@ static bool hpd_pin_has_pulse(struct intel_display *display, enum hpd_pin pin)
 
 static bool hpd_pin_is_blocked(struct intel_display *display, enum hpd_pin pin)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
-	lockdep_assert_held(&i915->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	return display->hotplug.stats[pin].blocked_count;
 }
@@ -400,14 +394,13 @@ static void i915_digport_work_func(struct work_struct *work)
 {
 	struct intel_display *display =
 		container_of(work, struct intel_display, hotplug.dig_port_work);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_hotplug *hotplug = &display->hotplug;
 	u32 long_hpd_pin_mask, short_hpd_pin_mask;
 	struct intel_encoder *encoder;
 	u32 blocked_hpd_pin_mask;
 	u32 old_bits = 0;
 
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	blocked_hpd_pin_mask = get_blocked_hpd_pin_mask(display);
 	long_hpd_pin_mask = hotplug->long_hpd_pin_mask & ~blocked_hpd_pin_mask;
@@ -415,7 +408,7 @@ static void i915_digport_work_func(struct work_struct *work)
 	short_hpd_pin_mask = hotplug->short_hpd_pin_mask & ~blocked_hpd_pin_mask;
 	hotplug->short_hpd_pin_mask &= ~short_hpd_pin_mask;
 
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 
 	for_each_intel_encoder(display->drm, encoder) {
 		struct intel_digital_port *dig_port;
@@ -442,11 +435,11 @@ static void i915_digport_work_func(struct work_struct *work)
 	}
 
 	if (old_bits) {
-		spin_lock_irq(&dev_priv->irq_lock);
+		spin_lock_irq(&display->irq.lock);
 		display->hotplug.event_bits |= old_bits;
 		queue_delayed_detection_work(display,
 					     &display->hotplug.hotplug_work, 0);
-		spin_unlock_irq(&dev_priv->irq_lock);
+		spin_unlock_irq(&display->irq.lock);
 	}
 }
 
@@ -460,17 +453,16 @@ static void i915_digport_work_func(struct work_struct *work)
 void intel_hpd_trigger_irq(struct intel_digital_port *dig_port)
 {
 	struct intel_display *display = to_intel_display(dig_port);
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct intel_hotplug *hotplug = &display->hotplug;
 	struct intel_encoder *encoder = &dig_port->base;
 
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	hotplug->short_hpd_pin_mask |= BIT(encoder->hpd_pin);
 	if (!hpd_pin_is_blocked(display, encoder->hpd_pin))
 		queue_work(hotplug->dp_wq, &hotplug->dig_port_work);
 
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 /*
@@ -480,7 +472,6 @@ static void i915_hotplug_work_func(struct work_struct *work)
 {
 	struct intel_display *display =
 		container_of(work, struct intel_display, hotplug.hotplug_work.work);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_hotplug *hotplug = &display->hotplug;
 	struct drm_connector_list_iter conn_iter;
 	struct intel_connector *connector;
@@ -494,7 +485,7 @@ static void i915_hotplug_work_func(struct work_struct *work)
 	mutex_lock(&display->drm->mode_config.mutex);
 	drm_dbg_kms(display->drm, "running encoder hotplug functions\n");
 
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	blocked_hpd_pin_mask = get_blocked_hpd_pin_mask(display);
 	hpd_event_bits = hotplug->event_bits & ~blocked_hpd_pin_mask;
@@ -505,7 +496,7 @@ static void i915_hotplug_work_func(struct work_struct *work)
 	/* Enable polling for connectors which had HPD IRQ storms */
 	intel_hpd_irq_storm_switch_to_polling(display);
 
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 
 	/* Skip calling encode hotplug handlers if ignore long HPD set*/
 	if (display->hotplug.ignore_long_hpd) {
@@ -569,13 +560,13 @@ static void i915_hotplug_work_func(struct work_struct *work)
 	/* Remove shared HPD pins that have changed */
 	retry &= ~changed;
 	if (retry) {
-		spin_lock_irq(&dev_priv->irq_lock);
+		spin_lock_irq(&display->irq.lock);
 		display->hotplug.retry_bits |= retry;
 
 		mod_delayed_detection_work(display,
 					   &display->hotplug.hotplug_work,
 					   msecs_to_jiffies(HPD_RETRY_DELAY));
-		spin_unlock_irq(&dev_priv->irq_lock);
+		spin_unlock_irq(&display->irq.lock);
 	}
 }
 
@@ -599,7 +590,6 @@ static void i915_hotplug_work_func(struct work_struct *work)
 void intel_hpd_irq_handler(struct intel_display *display,
 			   u32 pin_mask, u32 long_mask)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_encoder *encoder;
 	bool storm_detected = false;
 	bool queue_dig = false, queue_hp = false;
@@ -610,7 +600,7 @@ void intel_hpd_irq_handler(struct intel_display *display,
 	if (!pin_mask)
 		return;
 
-	spin_lock(&dev_priv->irq_lock);
+	spin_lock(&display->irq.lock);
 
 	/*
 	 * Determine whether ->hpd_pulse() exists for each pin, and
@@ -711,7 +701,7 @@ void intel_hpd_irq_handler(struct intel_display *display,
 		queue_delayed_detection_work(display,
 					     &display->hotplug.hotplug_work, 0);
 
-	spin_unlock(&dev_priv->irq_lock);
+	spin_unlock(&display->irq.lock);
 }
 
 /**
@@ -730,7 +720,6 @@ void intel_hpd_irq_handler(struct intel_display *display,
  */
 void intel_hpd_init(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	int i;
 
 	if (!HAS_DISPLAY(display))
@@ -745,9 +734,9 @@ void intel_hpd_init(struct intel_display *display)
 	 * Interrupt setup is already guaranteed to be single-threaded, this is
 	 * just to make the assert_spin_locked checks happy.
 	 */
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	intel_hpd_irq_setup(display);
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 static void i915_hpd_poll_detect_connectors(struct intel_display *display)
@@ -797,7 +786,6 @@ static void i915_hpd_poll_init_work(struct work_struct *work)
 {
 	struct intel_display *display =
 		container_of(work, typeof(*display), hotplug.poll_init_work);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct drm_connector_list_iter conn_iter;
 	struct intel_connector *connector;
 	intel_wakeref_t wakeref;
@@ -820,7 +808,7 @@ static void i915_hpd_poll_init_work(struct work_struct *work)
 		cancel_work(&display->hotplug.poll_init_work);
 	}
 
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	drm_connector_list_iter_begin(display->drm, &conn_iter);
 	for_each_intel_connector_iter(connector, &conn_iter) {
@@ -841,7 +829,7 @@ static void i915_hpd_poll_init_work(struct work_struct *work)
 	}
 	drm_connector_list_iter_end(&conn_iter);
 
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 
 	if (enabled)
 		drm_kms_helper_poll_reschedule(display->drm);
@@ -879,8 +867,6 @@ static void i915_hpd_poll_init_work(struct work_struct *work)
  */
 void intel_hpd_poll_enable(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
 	if (!HAS_DISPLAY(display) || !intel_display_device_enabled(display))
 		return;
 
@@ -892,10 +878,10 @@ void intel_hpd_poll_enable(struct intel_display *display)
 	 * As well, there's no issue if we race here since we always reschedule
 	 * this worker anyway
 	 */
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	queue_detection_work(display,
 			     &display->hotplug.poll_init_work);
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 /**
@@ -919,17 +905,15 @@ void intel_hpd_poll_enable(struct intel_display *display)
  */
 void intel_hpd_poll_disable(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
 	if (!HAS_DISPLAY(display))
 		return;
 
 	WRITE_ONCE(display->hotplug.poll_enabled, false);
 
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	queue_detection_work(display,
 			     &display->hotplug.poll_init_work);
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 void intel_hpd_poll_fini(struct intel_display *display)
@@ -981,12 +965,10 @@ static bool cancel_all_detection_work(struct intel_display *display)
 
 void intel_hpd_cancel_work(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
 	if (!HAS_DISPLAY(display))
 		return;
 
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	drm_WARN_ON(display->drm, get_blocked_hpd_pin_mask(display));
 
@@ -995,7 +977,7 @@ void intel_hpd_cancel_work(struct intel_display *display)
 	display->hotplug.event_bits = 0;
 	display->hotplug.retry_bits = 0;
 
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 
 	cancel_work_sync(&display->hotplug.dig_port_work);
 
@@ -1009,13 +991,12 @@ void intel_hpd_cancel_work(struct intel_display *display)
 
 static void queue_work_for_missed_irqs(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct intel_hotplug *hotplug = &display->hotplug;
 	bool queue_hp_work = false;
 	u32 blocked_hpd_pin_mask;
 	enum hpd_pin pin;
 
-	lockdep_assert_held(&i915->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	blocked_hpd_pin_mask = get_blocked_hpd_pin_mask(display);
 	if ((hotplug->event_bits | hotplug->retry_bits) & ~blocked_hpd_pin_mask)
@@ -1043,10 +1024,9 @@ static void queue_work_for_missed_irqs(struct intel_display *display)
 
 static bool block_hpd_pin(struct intel_display *display, enum hpd_pin pin)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct intel_hotplug *hotplug = &display->hotplug;
 
-	lockdep_assert_held(&i915->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	hotplug->stats[pin].blocked_count++;
 
@@ -1055,10 +1035,9 @@ static bool block_hpd_pin(struct intel_display *display, enum hpd_pin pin)
 
 static bool unblock_hpd_pin(struct intel_display *display, enum hpd_pin pin)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct intel_hotplug *hotplug = &display->hotplug;
 
-	lockdep_assert_held(&i915->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	if (drm_WARN_ON(display->drm, hotplug->stats[pin].blocked_count == 0))
 		return true;
@@ -1095,19 +1074,18 @@ static bool unblock_hpd_pin(struct intel_display *display, enum hpd_pin pin)
 void intel_hpd_block(struct intel_encoder *encoder)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct intel_hotplug *hotplug = &display->hotplug;
 	bool do_flush = false;
 
 	if (encoder->hpd_pin == HPD_NONE)
 		return;
 
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	if (block_hpd_pin(display, encoder->hpd_pin))
 		do_flush = true;
 
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 
 	if (do_flush && hpd_pin_has_pulse(display, encoder->hpd_pin))
 		flush_work(&hotplug->dig_port_work);
@@ -1125,17 +1103,16 @@ void intel_hpd_block(struct intel_encoder *encoder)
 void intel_hpd_unblock(struct intel_encoder *encoder)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *i915 = to_i915(display->drm);
 
 	if (encoder->hpd_pin == HPD_NONE)
 		return;
 
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	if (unblock_hpd_pin(display, encoder->hpd_pin))
 		queue_work_for_missed_irqs(display);
 
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 /**
@@ -1149,14 +1126,13 @@ void intel_hpd_unblock(struct intel_encoder *encoder)
 void intel_hpd_clear_and_unblock(struct intel_encoder *encoder)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct intel_hotplug *hotplug = &display->hotplug;
 	enum hpd_pin pin = encoder->hpd_pin;
 
 	if (pin == HPD_NONE)
 		return;
 
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 
 	if (unblock_hpd_pin(display, pin)) {
 		hotplug->event_bits &= ~BIT(pin);
@@ -1165,39 +1141,34 @@ void intel_hpd_clear_and_unblock(struct intel_encoder *encoder)
 		hotplug->long_hpd_pin_mask &= ~BIT(pin);
 	}
 
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 void intel_hpd_enable_detection_work(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	display->hotplug.detection_work_enabled = true;
 	queue_work_for_missed_irqs(display);
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 void intel_hpd_disable_detection_work(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	display->hotplug.detection_work_enabled = false;
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 
 	cancel_all_detection_work(display);
 }
 
 bool intel_hpd_schedule_detection(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	unsigned long flags;
 	bool ret;
 
-	spin_lock_irqsave(&i915->irq_lock, flags);
+	spin_lock_irqsave(&display->irq.lock, flags);
 	ret = queue_delayed_detection_work(display, &display->hotplug.hotplug_work, 0);
-	spin_unlock_irqrestore(&i915->irq_lock, flags);
+	spin_unlock_irqrestore(&display->irq.lock, flags);
 
 	return ret;
 }
@@ -1228,7 +1199,6 @@ static ssize_t i915_hpd_storm_ctl_write(struct file *file,
 {
 	struct seq_file *m = file->private_data;
 	struct intel_display *display = m->private;
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_hotplug *hotplug = &display->hotplug;
 	unsigned int new_threshold;
 	int i;
@@ -1260,12 +1230,12 @@ static ssize_t i915_hpd_storm_ctl_write(struct file *file,
 	else
 		drm_dbg_kms(display->drm, "Disabling HPD storm detection\n");
 
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	hotplug->hpd_storm_threshold = new_threshold;
 	/* Reset the HPD storm stats so we don't accidentally trigger a storm */
 	for_each_hpd_pin(i)
 		hotplug->stats[i].count = 0;
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 
 	/* Re-enable hpd immediately if we were in an irq storm */
 	flush_delayed_work(&display->hotplug.reenable_work);
@@ -1310,7 +1280,6 @@ static ssize_t i915_hpd_short_storm_ctl_write(struct file *file,
 {
 	struct seq_file *m = file->private_data;
 	struct intel_display *display = m->private;
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_hotplug *hotplug = &display->hotplug;
 	char *newline;
 	char tmp[16];
@@ -1339,12 +1308,12 @@ static ssize_t i915_hpd_short_storm_ctl_write(struct file *file,
 	drm_dbg_kms(display->drm, "%sabling HPD short storm detection\n",
 		    new_state ? "En" : "Dis");
 
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	hotplug->hpd_short_storm_enabled = new_state;
 	/* Reset the HPD storm stats so we don't accidentally trigger a storm */
 	for_each_hpd_pin(i)
 		hotplug->stats[i].count = 0;
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 
 	/* Re-enable hpd immediately if we were in an irq storm */
 	flush_delayed_work(&display->hotplug.reenable_work);
diff --git a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c
index 2463e61e7802..c024b42369c8 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c
@@ -3,8 +3,10 @@
  * Copyright © 2023 Intel Corporation
  */
 
-#include "i915_drv.h"
+#include <drm/drm_print.h>
+
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "intel_de.h"
 #include "intel_display_irq.h"
 #include "intel_display_types.h"
@@ -133,7 +135,6 @@ static const u32 hpd_mtp[HPD_NUM_PINS] = {
 
 static void intel_hpd_init_pins(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_hotplug *hpd = &display->hotplug;
 
 	if (HAS_GMCH(display)) {
@@ -160,33 +161,31 @@ static void intel_hpd_init_pins(struct intel_display *display)
 	else
 		hpd->hpd = hpd_ilk;
 
-	if ((INTEL_PCH_TYPE(dev_priv) < PCH_DG1) &&
-	    (!HAS_PCH_SPLIT(dev_priv) || HAS_PCH_NOP(dev_priv)))
+	if ((INTEL_PCH_TYPE(display) < PCH_DG1) &&
+	    (!HAS_PCH_SPLIT(display) || HAS_PCH_NOP(display)))
 		return;
 
-	if (INTEL_PCH_TYPE(dev_priv) >= PCH_MTL)
+	if (INTEL_PCH_TYPE(display) >= PCH_MTL)
 		hpd->pch_hpd = hpd_mtp;
-	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG1)
+	else if (INTEL_PCH_TYPE(display) >= PCH_DG1)
 		hpd->pch_hpd = hpd_sde_dg1;
-	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
+	else if (INTEL_PCH_TYPE(display) >= PCH_ICP)
 		hpd->pch_hpd = hpd_icp;
-	else if (HAS_PCH_CNP(dev_priv) || HAS_PCH_SPT(dev_priv))
+	else if (HAS_PCH_CNP(display) || HAS_PCH_SPT(display))
 		hpd->pch_hpd = hpd_spt;
-	else if (HAS_PCH_LPT(dev_priv) || HAS_PCH_CPT(dev_priv))
+	else if (HAS_PCH_LPT(display) || HAS_PCH_CPT(display))
 		hpd->pch_hpd = hpd_cpt;
-	else if (HAS_PCH_IBX(dev_priv))
+	else if (HAS_PCH_IBX(display))
 		hpd->pch_hpd = hpd_ibx;
 	else
-		MISSING_CASE(INTEL_PCH_TYPE(dev_priv));
+		MISSING_CASE(INTEL_PCH_TYPE(display));
 }
 
 /* For display hotplug interrupt */
 void i915_hotplug_interrupt_update_locked(struct intel_display *display,
 					  u32 mask, u32 bits)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 	drm_WARN_ON(display->drm, bits & ~mask);
 
 	intel_de_rmw(display, PORT_HOTPLUG_EN(display), mask, bits);
@@ -208,11 +207,9 @@ void i915_hotplug_interrupt_update(struct intel_display *display,
 				   u32 mask,
 				   u32 bits)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
-	spin_lock_irq(&dev_priv->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	i915_hotplug_interrupt_update_locked(display, mask, bits);
-	spin_unlock_irq(&dev_priv->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 static bool gen11_port_hotplug_long_detect(enum hpd_pin pin, u32 val)
@@ -557,7 +554,6 @@ void xelpdp_pica_irq_handler(struct intel_display *display, u32 iir)
 
 void icp_irq_handler(struct intel_display *display, u32 pch_iir)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	u32 ddi_hotplug_trigger = pch_iir & SDE_DDI_HOTPLUG_MASK_ICP;
 	u32 tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_MASK_ICP;
 	u32 pin_mask = 0, long_mask = 0;
@@ -566,9 +562,9 @@ void icp_irq_handler(struct intel_display *display, u32 pch_iir)
 		u32 dig_hotplug_reg;
 
 		/* Locking due to DSI native GPIO sequences */
-		spin_lock(&dev_priv->irq_lock);
+		spin_lock(&display->irq.lock);
 		dig_hotplug_reg = intel_de_rmw(display, SHOTPLUG_CTL_DDI, 0, 0);
-		spin_unlock(&dev_priv->irq_lock);
+		spin_unlock(&display->irq.lock);
 
 		intel_get_hpd_pins(display, &pin_mask, &long_mask,
 				   ddi_hotplug_trigger, dig_hotplug_reg,
@@ -711,7 +707,7 @@ static u32 ibx_hotplug_mask(enum hpd_pin hpd_pin)
 
 static u32 ibx_hotplug_enables(struct intel_encoder *encoder)
 {
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	struct intel_display *display = to_intel_display(encoder);
 
 	switch (encoder->hpd_pin) {
 	case HPD_PORT_A:
@@ -719,7 +715,7 @@ static u32 ibx_hotplug_enables(struct intel_encoder *encoder)
 		 * When CPU and PCH are on the same package, port A
 		 * HPD must be enabled in both north and south.
 		 */
-		return HAS_PCH_LPT_LP(i915) ?
+		return HAS_PCH_LPT_LP(display) ?
 			PORTA_HOTPLUG_ENABLE : 0;
 	case HPD_PORT_B:
 		return PORTB_HOTPLUG_ENABLE |
@@ -940,18 +936,17 @@ static void gen11_tbt_hpd_enable_detection(struct intel_encoder *encoder)
 
 static void gen11_hpd_enable_detection(struct intel_encoder *encoder)
 {
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	struct intel_display *display = to_intel_display(encoder);
 
 	gen11_tc_hpd_enable_detection(encoder);
 	gen11_tbt_hpd_enable_detection(encoder);
 
-	if (INTEL_PCH_TYPE(i915) >= PCH_ICP)
+	if (INTEL_PCH_TYPE(display) >= PCH_ICP)
 		icp_hpd_enable_detection(encoder);
 }
 
 static void gen11_hpd_irq_setup(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	u32 hotplug_irqs, enabled_irqs;
 
 	enabled_irqs = intel_hpd_enabled_irqs(display, display->hotplug.hpd);
@@ -964,7 +959,7 @@ static void gen11_hpd_irq_setup(struct intel_display *display)
 	gen11_tc_hpd_detection_setup(display);
 	gen11_tbt_hpd_detection_setup(display);
 
-	if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
+	if (INTEL_PCH_TYPE(display) >= PCH_ICP)
 		icp_hpd_irq_setup(display);
 }
 
@@ -1138,7 +1133,6 @@ static void xelpdp_hpd_enable_detection(struct intel_encoder *encoder)
 
 static void xelpdp_hpd_irq_setup(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	u32 hotplug_irqs, enabled_irqs;
 
 	enabled_irqs = intel_hpd_enabled_irqs(display, display->hotplug.hpd);
@@ -1150,9 +1144,9 @@ static void xelpdp_hpd_irq_setup(struct intel_display *display)
 
 	xelpdp_pica_hpd_detection_setup(display);
 
-	if (INTEL_PCH_TYPE(i915) >= PCH_LNL)
+	if (INTEL_PCH_TYPE(display) >= PCH_LNL)
 		xe2lpd_sde_hpd_irq_setup(display);
-	else if (INTEL_PCH_TYPE(i915) >= PCH_MTL)
+	else if (INTEL_PCH_TYPE(display) >= PCH_MTL)
 		mtp_hpd_irq_setup(display);
 }
 
@@ -1194,10 +1188,8 @@ static u32 spt_hotplug2_enables(struct intel_encoder *encoder)
 
 static void spt_hpd_detection_setup(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
 	/* Display WA #1179 WaHardHangonHotPlug: cnp */
-	if (HAS_PCH_CNP(dev_priv)) {
+	if (HAS_PCH_CNP(display)) {
 		intel_de_rmw(display, SOUTH_CHICKEN1, CHASSIS_CLK_REQ_DURATION_MASK,
 			     CHASSIS_CLK_REQ_DURATION(0xf));
 	}
@@ -1215,10 +1207,9 @@ static void spt_hpd_detection_setup(struct intel_display *display)
 static void spt_hpd_enable_detection(struct intel_encoder *encoder)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 
 	/* Display WA #1179 WaHardHangonHotPlug: cnp */
-	if (HAS_PCH_CNP(i915)) {
+	if (HAS_PCH_CNP(display)) {
 		intel_de_rmw(display, SOUTH_CHICKEN1,
 			     CHASSIS_CLK_REQ_DURATION_MASK,
 			     CHASSIS_CLK_REQ_DURATION(0xf));
@@ -1235,10 +1226,9 @@ static void spt_hpd_enable_detection(struct intel_encoder *encoder)
 
 static void spt_hpd_irq_setup(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	u32 hotplug_irqs, enabled_irqs;
 
-	if (INTEL_PCH_TYPE(dev_priv) >= PCH_CNP)
+	if (INTEL_PCH_TYPE(display) >= PCH_CNP)
 		intel_de_write(display, SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ);
 
 	enabled_irqs = intel_hpd_enabled_irqs(display, display->hotplug.pch_hpd);
@@ -1402,10 +1392,9 @@ static void i915_hpd_enable_detection(struct intel_encoder *encoder)
 
 static void i915_hpd_irq_setup(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	u32 hotplug_en;
 
-	lockdep_assert_held(&dev_priv->irq_lock);
+	lockdep_assert_held(&display->irq.lock);
 
 	/*
 	 * Note HDMI and DP share hotplug bits. Enable bits are the same for all
@@ -1474,8 +1463,6 @@ void intel_hpd_irq_setup(struct intel_display *display)
 
 void intel_hotplug_irq_init(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
 	intel_hpd_init_pins(display);
 
 	intel_hpd_init_early(display);
@@ -1484,9 +1471,9 @@ void intel_hotplug_irq_init(struct intel_display *display)
 		if (HAS_HOTPLUG(display))
 			display->funcs.hotplug = &i915_hpd_funcs;
 	} else {
-		if (HAS_PCH_DG2(i915))
+		if (HAS_PCH_DG2(display))
 			display->funcs.hotplug = &icp_hpd_funcs;
-		else if (HAS_PCH_DG1(i915))
+		else if (HAS_PCH_DG1(display))
 			display->funcs.hotplug = &dg1_hpd_funcs;
 		else if (DISPLAY_VER(display) >= 14)
 			display->funcs.hotplug = &xelpdp_hpd_funcs;
@@ -1494,9 +1481,9 @@ void intel_hotplug_irq_init(struct intel_display *display)
 			display->funcs.hotplug = &gen11_hpd_funcs;
 		else if (display->platform.geminilake || display->platform.broxton)
 			display->funcs.hotplug = &bxt_hpd_funcs;
-		else if (INTEL_PCH_TYPE(i915) >= PCH_ICP)
+		else if (INTEL_PCH_TYPE(display) >= PCH_ICP)
 			display->funcs.hotplug = &icp_hpd_funcs;
-		else if (INTEL_PCH_TYPE(i915) >= PCH_SPT)
+		else if (INTEL_PCH_TYPE(display) >= PCH_SPT)
 			display->funcs.hotplug = &spt_hpd_funcs;
 		else
 			display->funcs.hotplug = &ilk_hpd_funcs;
diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c
index 59551c8414c2..666148a14522 100644
--- a/drivers/gpu/drm/i915/display/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c
@@ -179,7 +179,7 @@ static int lpe_audio_irq_init(struct intel_display *display)
 				handle_simple_irq,
 				"hdmi_lpe_audio_irq_handler");
 
-	return irq_set_chip_data(irq, dev_priv);
+	return 0;
 }
 
 static bool lpe_audio_detect(struct intel_display *display)
diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c
index 89d26913e253..8ce7c630da52 100644
--- a/drivers/gpu/drm/i915/display/intel_lvds.c
+++ b/drivers/gpu/drm/i915/display/intel_lvds.c
@@ -37,9 +37,9 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
-#include "i915_drv.h"
 #include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_backlight.h"
@@ -87,13 +87,12 @@ static struct intel_lvds_encoder *to_lvds_encoder(struct intel_encoder *encoder)
 bool intel_lvds_port_enabled(struct intel_display *display,
 			     i915_reg_t lvds_reg, enum pipe *pipe)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	u32 val;
 
 	val = intel_de_read(display, lvds_reg);
 
 	/* asserts want to know the pipe even if the port is disabled */
-	if (HAS_PCH_CPT(i915))
+	if (HAS_PCH_CPT(display))
 		*pipe = REG_FIELD_GET(LVDS_PIPE_SEL_MASK_CPT, val);
 	else
 		*pipe = REG_FIELD_GET(LVDS_PIPE_SEL_MASK, val);
@@ -243,13 +242,12 @@ static void intel_pre_enable_lvds(struct intel_atomic_state *state,
 {
 	struct intel_display *display = to_intel_display(state);
 	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(encoder);
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
 	enum pipe pipe = crtc->pipe;
 	u32 temp;
 
-	if (HAS_PCH_SPLIT(i915)) {
+	if (HAS_PCH_SPLIT(display)) {
 		assert_fdi_rx_pll_disabled(display, pipe);
 		assert_shared_dpll_disabled(display, crtc_state->shared_dpll);
 	} else {
@@ -261,7 +259,7 @@ static void intel_pre_enable_lvds(struct intel_atomic_state *state,
 	temp = lvds_encoder->init_lvds_val;
 	temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP;
 
-	if (HAS_PCH_CPT(i915)) {
+	if (HAS_PCH_CPT(display)) {
 		temp &= ~LVDS_PIPE_SEL_MASK_CPT;
 		temp |= LVDS_PIPE_SEL_CPT(pipe);
 	} else {
@@ -421,7 +419,6 @@ static int intel_lvds_compute_config(struct intel_encoder *encoder,
 				     struct drm_connector_state *conn_state)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(encoder);
 	struct intel_connector *connector = lvds_encoder->attached_connector;
 	struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
@@ -435,7 +432,7 @@ static int intel_lvds_compute_config(struct intel_encoder *encoder,
 		return -EINVAL;
 	}
 
-	if (HAS_PCH_SPLIT(i915)) {
+	if (HAS_PCH_SPLIT(display)) {
 		crtc_state->has_pch_encoder = true;
 		if (!intel_fdi_compute_pipe_bpp(crtc_state))
 			return -EINVAL;
@@ -798,7 +795,6 @@ bool intel_is_dual_link_lvds(struct intel_display *display)
 static bool compute_is_dual_link_lvds(struct intel_lvds_encoder *lvds_encoder)
 {
 	struct intel_display *display = to_intel_display(&lvds_encoder->base);
-	struct drm_i915_private *i915 = to_i915(lvds_encoder->base.base.dev);
 	struct intel_connector *connector = lvds_encoder->attached_connector;
 	const struct drm_display_mode *fixed_mode =
 		intel_panel_preferred_fixed_mode(connector);
@@ -822,7 +818,7 @@ static bool compute_is_dual_link_lvds(struct intel_lvds_encoder *lvds_encoder)
 	 * register is uninitialized.
 	 */
 	val = intel_de_read(display, lvds_encoder->reg);
-	if (HAS_PCH_CPT(i915))
+	if (HAS_PCH_CPT(display))
 		val &= ~(LVDS_DETECTED | LVDS_PIPE_SEL_MASK_CPT);
 	else
 		val &= ~(LVDS_DETECTED | LVDS_PIPE_SEL_MASK);
@@ -846,7 +842,6 @@ static void intel_lvds_add_properties(struct drm_connector *connector)
  */
 void intel_lvds_init(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct intel_lvds_encoder *lvds_encoder;
 	struct intel_connector *connector;
 	const struct drm_edid *drm_edid;
@@ -868,14 +863,14 @@ void intel_lvds_init(struct intel_display *display)
 		return;
 	}
 
-	if (HAS_PCH_SPLIT(i915))
+	if (HAS_PCH_SPLIT(display))
 		lvds_reg = PCH_LVDS;
 	else
 		lvds_reg = LVDS;
 
 	lvds = intel_de_read(display, lvds_reg);
 
-	if (HAS_PCH_SPLIT(i915)) {
+	if (HAS_PCH_SPLIT(display)) {
 		if ((lvds & LVDS_DETECTED) == 0)
 			return;
 	}
@@ -915,7 +910,7 @@ void intel_lvds_init(struct intel_display *display)
 	encoder->enable = intel_enable_lvds;
 	encoder->pre_enable = intel_pre_enable_lvds;
 	encoder->compute_config = intel_lvds_compute_config;
-	if (HAS_PCH_SPLIT(i915)) {
+	if (HAS_PCH_SPLIT(display)) {
 		encoder->disable = pch_disable_lvds;
 		encoder->post_disable = pch_post_disable_lvds;
 	} else {
diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c
index 9e963bce340f..0325b0c9506d 100644
--- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c
+++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c
@@ -6,11 +6,11 @@
  * state.
  */
 
-#include <drm/drm_atomic_uapi.h>
 #include <drm/drm_atomic_state_helper.h>
+#include <drm/drm_atomic_uapi.h>
+#include <drm/drm_print.h>
 #include <drm/drm_vblank.h>
 
-#include "i915_drv.h"
 #include "i915_reg.h"
 #include "i9xx_wm.h"
 #include "intel_atomic.h"
@@ -31,13 +31,14 @@
 #include "intel_pmdemand.h"
 #include "intel_tc.h"
 #include "intel_vblank.h"
+#include "intel_vga.h"
 #include "intel_wm.h"
 #include "skl_watermark.h"
 
 static void intel_crtc_disable_noatomic_begin(struct intel_crtc *crtc,
 					      struct drm_modeset_acquire_ctx *ctx)
 {
-	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	struct intel_display *display = to_intel_display(crtc);
 	struct intel_crtc_state *crtc_state =
 		to_intel_crtc_state(crtc->base.state);
 	struct intel_plane *plane;
@@ -48,7 +49,7 @@ static void intel_crtc_disable_noatomic_begin(struct intel_crtc *crtc,
 	if (!crtc_state->hw.active)
 		return;
 
-	for_each_intel_plane_on_crtc(&i915->drm, crtc, plane) {
+	for_each_intel_plane_on_crtc(display->drm, crtc, plane) {
 		const struct intel_plane_state *plane_state =
 			to_intel_plane_state(plane->base.state);
 
@@ -56,9 +57,9 @@ static void intel_crtc_disable_noatomic_begin(struct intel_crtc *crtc,
 			intel_plane_disable_noatomic(crtc, plane);
 	}
 
-	state = drm_atomic_state_alloc(&i915->drm);
+	state = drm_atomic_state_alloc(display->drm);
 	if (!state) {
-		drm_dbg_kms(&i915->drm,
+		drm_dbg_kms(display->drm,
 			    "failed to disable [CRTC:%d:%s], out of memory",
 			    crtc->base.base.id, crtc->base.name);
 		return;
@@ -68,7 +69,7 @@ static void intel_crtc_disable_noatomic_begin(struct intel_crtc *crtc,
 	to_intel_atomic_state(state)->internal = true;
 
 	/* Everything's already locked, -EDEADLK can't happen. */
-	for_each_intel_crtc_in_pipe_mask(&i915->drm, temp_crtc,
+	for_each_intel_crtc_in_pipe_mask(display->drm, temp_crtc,
 					 BIT(pipe) |
 					 intel_crtc_joiner_secondary_pipes(crtc_state)) {
 		struct intel_crtc_state *temp_crtc_state =
@@ -77,14 +78,14 @@ static void intel_crtc_disable_noatomic_begin(struct intel_crtc *crtc,
 
 		ret = drm_atomic_add_affected_connectors(state, &temp_crtc->base);
 
-		drm_WARN_ON(&i915->drm, IS_ERR(temp_crtc_state) || ret);
+		drm_WARN_ON(display->drm, IS_ERR(temp_crtc_state) || ret);
 	}
 
-	i915->display.funcs.display->crtc_disable(to_intel_atomic_state(state), crtc);
+	display->funcs.display->crtc_disable(to_intel_atomic_state(state), crtc);
 
 	drm_atomic_state_put(state);
 
-	drm_dbg_kms(&i915->drm,
+	drm_dbg_kms(display->drm,
 		    "[CRTC:%d:%s] hw state adjusted, was enabled, now disabled\n",
 		    crtc->base.base.id, crtc->base.name);
 
@@ -118,13 +119,12 @@ static void set_encoder_for_connector(struct intel_connector *connector,
 static void reset_encoder_connector_state(struct intel_encoder *encoder)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct intel_pmdemand_state *pmdemand_state =
-		to_intel_pmdemand_state(i915->display.pmdemand.obj.state);
+		to_intel_pmdemand_state(display->pmdemand.obj.state);
 	struct intel_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 
-	drm_connector_list_iter_begin(&i915->drm, &conn_iter);
+	drm_connector_list_iter_begin(display->drm, &conn_iter);
 	for_each_intel_connector_iter(connector, &conn_iter) {
 		if (connector->base.encoder != &encoder->base)
 			continue;
@@ -143,10 +143,10 @@ static void reset_encoder_connector_state(struct intel_encoder *encoder)
 
 static void reset_crtc_encoder_state(struct intel_crtc *crtc)
 {
-	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	struct intel_display *display = to_intel_display(crtc);
 	struct intel_encoder *encoder;
 
-	for_each_encoder_on_crtc(&i915->drm, &crtc->base, encoder) {
+	for_each_encoder_on_crtc(display->drm, &crtc->base, encoder) {
 		reset_encoder_connector_state(encoder);
 		encoder->base.crtc = NULL;
 	}
@@ -183,13 +183,13 @@ static void intel_crtc_disable_noatomic_complete(struct intel_crtc *crtc)
  * Return all the pipes using a transcoder in @transcoder_mask.
  * For joiner configs return only the joiner primary.
  */
-static u8 get_transcoder_pipes(struct drm_i915_private *i915,
+static u8 get_transcoder_pipes(struct intel_display *display,
 			       u8 transcoder_mask)
 {
 	struct intel_crtc *temp_crtc;
 	u8 pipes = 0;
 
-	for_each_intel_crtc(&i915->drm, temp_crtc) {
+	for_each_intel_crtc(display->drm, temp_crtc) {
 		struct intel_crtc_state *temp_crtc_state =
 			to_intel_crtc_state(temp_crtc->base.state);
 
@@ -214,7 +214,6 @@ static void get_portsync_pipes(struct intel_crtc *crtc,
 			       u8 *master_pipe_mask, u8 *slave_pipes_mask)
 {
 	struct intel_display *display = to_intel_display(crtc);
-	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	struct intel_crtc_state *crtc_state =
 		to_intel_crtc_state(crtc->base.state);
 	struct intel_crtc *master_crtc;
@@ -233,20 +232,20 @@ static void get_portsync_pipes(struct intel_crtc *crtc,
 	else
 		master_transcoder = crtc_state->master_transcoder;
 
-	*master_pipe_mask = get_transcoder_pipes(i915, BIT(master_transcoder));
-	drm_WARN_ON(&i915->drm, !is_power_of_2(*master_pipe_mask));
+	*master_pipe_mask = get_transcoder_pipes(display, BIT(master_transcoder));
+	drm_WARN_ON(display->drm, !is_power_of_2(*master_pipe_mask));
 
 	master_crtc = intel_crtc_for_pipe(display, ffs(*master_pipe_mask) - 1);
 	master_crtc_state = to_intel_crtc_state(master_crtc->base.state);
-	*slave_pipes_mask = get_transcoder_pipes(i915, master_crtc_state->sync_mode_slaves_mask);
+	*slave_pipes_mask = get_transcoder_pipes(display, master_crtc_state->sync_mode_slaves_mask);
 }
 
-static u8 get_joiner_secondary_pipes(struct drm_i915_private *i915, u8 primary_pipes_mask)
+static u8 get_joiner_secondary_pipes(struct intel_display *display, u8 primary_pipes_mask)
 {
 	struct intel_crtc *primary_crtc;
 	u8 pipes = 0;
 
-	for_each_intel_crtc_in_pipe_mask(&i915->drm, primary_crtc, primary_pipes_mask) {
+	for_each_intel_crtc_in_pipe_mask(display->drm, primary_crtc, primary_pipes_mask) {
 		struct intel_crtc_state *primary_crtc_state =
 			to_intel_crtc_state(primary_crtc->base.state);
 
@@ -259,45 +258,45 @@ static u8 get_joiner_secondary_pipes(struct drm_i915_private *i915, u8 primary_p
 static void intel_crtc_disable_noatomic(struct intel_crtc *crtc,
 					struct drm_modeset_acquire_ctx *ctx)
 {
-	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	struct intel_display *display = to_intel_display(crtc);
+	struct intel_crtc *temp_crtc;
 	u8 portsync_master_mask;
 	u8 portsync_slaves_mask;
 	u8 joiner_secondaries_mask;
-	struct intel_crtc *temp_crtc;
 
 	/* TODO: Add support for MST */
 	get_portsync_pipes(crtc, &portsync_master_mask, &portsync_slaves_mask);
-	joiner_secondaries_mask = get_joiner_secondary_pipes(i915,
+	joiner_secondaries_mask = get_joiner_secondary_pipes(display,
 							     portsync_master_mask |
 							     portsync_slaves_mask);
 
-	drm_WARN_ON(&i915->drm,
+	drm_WARN_ON(display->drm,
 		    portsync_master_mask & portsync_slaves_mask ||
 		    portsync_master_mask & joiner_secondaries_mask ||
 		    portsync_slaves_mask & joiner_secondaries_mask);
 
-	for_each_intel_crtc_in_pipe_mask(&i915->drm, temp_crtc, joiner_secondaries_mask)
+	for_each_intel_crtc_in_pipe_mask(display->drm, temp_crtc, joiner_secondaries_mask)
 		intel_crtc_disable_noatomic_begin(temp_crtc, ctx);
 
-	for_each_intel_crtc_in_pipe_mask(&i915->drm, temp_crtc, portsync_slaves_mask)
+	for_each_intel_crtc_in_pipe_mask(display->drm, temp_crtc, portsync_slaves_mask)
 		intel_crtc_disable_noatomic_begin(temp_crtc, ctx);
 
-	for_each_intel_crtc_in_pipe_mask(&i915->drm, temp_crtc, portsync_master_mask)
+	for_each_intel_crtc_in_pipe_mask(display->drm, temp_crtc, portsync_master_mask)
 		intel_crtc_disable_noatomic_begin(temp_crtc, ctx);
 
-	for_each_intel_crtc_in_pipe_mask(&i915->drm, temp_crtc,
+	for_each_intel_crtc_in_pipe_mask(display->drm, temp_crtc,
 					 joiner_secondaries_mask |
 					 portsync_slaves_mask |
 					 portsync_master_mask)
 		intel_crtc_disable_noatomic_complete(temp_crtc);
 }
 
-static void intel_modeset_update_connector_atomic_state(struct drm_i915_private *i915)
+static void intel_modeset_update_connector_atomic_state(struct intel_display *display)
 {
 	struct intel_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 
-	drm_connector_list_iter_begin(&i915->drm, &conn_iter);
+	drm_connector_list_iter_begin(display->drm, &conn_iter);
 	for_each_intel_connector_iter(connector, &conn_iter) {
 		struct drm_connector_state *conn_state = connector->base.state;
 		struct intel_encoder *encoder =
@@ -319,7 +318,7 @@ static void intel_modeset_update_connector_atomic_state(struct drm_i915_private
 
 static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
+	struct intel_display *display = to_intel_display(crtc_state);
 
 	if (intel_crtc_is_joiner_secondary(crtc_state))
 		return;
@@ -332,7 +331,7 @@ static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state
 	crtc_state->uapi.adjusted_mode = crtc_state->hw.adjusted_mode;
 	crtc_state->uapi.scaling_filter = crtc_state->hw.scaling_filter;
 
-	if (DISPLAY_INFO(i915)->color.degamma_lut_size) {
+	if (DISPLAY_INFO(display)->color.degamma_lut_size) {
 		/* assume 1:1 mapping */
 		drm_property_replace_blob(&crtc_state->hw.degamma_lut,
 					  crtc_state->pre_csc_lut);
@@ -347,7 +346,7 @@ static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state
 		 * to gamma_lut as that is the only valid source of LUTs
 		 * in the uapi.
 		 */
-		drm_WARN_ON(&i915->drm, crtc_state->post_csc_lut &&
+		drm_WARN_ON(display->drm, crtc_state->post_csc_lut &&
 			    crtc_state->pre_csc_lut);
 
 		drm_property_replace_blob(&crtc_state->hw.degamma_lut,
@@ -366,15 +365,14 @@ static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state
 }
 
 static void
-intel_sanitize_plane_mapping(struct drm_i915_private *i915)
+intel_sanitize_plane_mapping(struct intel_display *display)
 {
-	struct intel_display *display = &i915->display;
 	struct intel_crtc *crtc;
 
-	if (DISPLAY_VER(i915) >= 4)
+	if (DISPLAY_VER(display) >= 4)
 		return;
 
-	for_each_intel_crtc(&i915->drm, crtc) {
+	for_each_intel_crtc(display->drm, crtc) {
 		struct intel_plane *plane =
 			to_intel_plane(crtc->base.primary);
 		struct intel_crtc *plane_crtc;
@@ -386,7 +384,7 @@ intel_sanitize_plane_mapping(struct drm_i915_private *i915)
 		if (pipe == crtc->pipe)
 			continue;
 
-		drm_dbg_kms(&i915->drm,
+		drm_dbg_kms(display->drm,
 			    "[PLANE:%d:%s] attached to the wrong pipe, disabling plane\n",
 			    plane->base.base.id, plane->base.name);
 
@@ -423,12 +421,12 @@ static bool intel_crtc_needs_link_reset(struct intel_crtc *crtc)
 
 static struct intel_connector *intel_encoder_find_connector(struct intel_encoder *encoder)
 {
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	struct intel_display *display = to_intel_display(encoder);
 	struct drm_connector_list_iter conn_iter;
 	struct intel_connector *connector;
 	struct intel_connector *found_connector = NULL;
 
-	drm_connector_list_iter_begin(&i915->drm, &conn_iter);
+	drm_connector_list_iter_begin(display->drm, &conn_iter);
 	for_each_intel_connector_iter(connector, &conn_iter) {
 		if (&encoder->base == connector->base.encoder) {
 			found_connector = connector;
@@ -466,7 +464,7 @@ static void intel_sanitize_fifo_underrun_reporting(const struct intel_crtc_state
 static bool intel_sanitize_crtc(struct intel_crtc *crtc,
 				struct drm_modeset_acquire_ctx *ctx)
 {
-	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	struct intel_display *display = to_intel_display(crtc);
 	struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state);
 	bool needs_link_reset;
 
@@ -474,7 +472,7 @@ static bool intel_sanitize_crtc(struct intel_crtc *crtc,
 		struct intel_plane *plane;
 
 		/* Disable everything but the primary plane */
-		for_each_intel_plane_on_crtc(&i915->drm, crtc, plane) {
+		for_each_intel_plane_on_crtc(display->drm, crtc, plane) {
 			const struct intel_plane_state *plane_state =
 				to_intel_plane_state(plane->base.state);
 
@@ -515,7 +513,7 @@ static bool intel_sanitize_crtc(struct intel_crtc *crtc,
 	return true;
 }
 
-static void intel_sanitize_all_crtcs(struct drm_i915_private *i915,
+static void intel_sanitize_all_crtcs(struct intel_display *display,
 				     struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_crtc *crtc;
@@ -530,7 +528,7 @@ static void intel_sanitize_all_crtcs(struct drm_i915_private *i915,
 	for (;;) {
 		u32 old_mask = crtcs_forced_off;
 
-		for_each_intel_crtc(&i915->drm, crtc) {
+		for_each_intel_crtc(display->drm, crtc) {
 			u32 crtc_mask = drm_crtc_mask(&crtc->base);
 
 			if (crtcs_forced_off & crtc_mask)
@@ -543,7 +541,7 @@ static void intel_sanitize_all_crtcs(struct drm_i915_private *i915,
 			break;
 	}
 
-	for_each_intel_crtc(&i915->drm, crtc) {
+	for_each_intel_crtc(display->drm, crtc) {
 		struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
 
@@ -553,7 +551,7 @@ static void intel_sanitize_all_crtcs(struct drm_i915_private *i915,
 
 static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
+	struct intel_display *display = to_intel_display(crtc_state);
 
 	/*
 	 * Some SNB BIOSen (eg. ASUS K53SV) are known to misprogram
@@ -565,7 +563,7 @@ static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state)
 	 * without several WARNs, but for now let's take the easy
 	 * road.
 	 */
-	return IS_SANDYBRIDGE(i915) &&
+	return display->platform.sandybridge &&
 		crtc_state->hw.active &&
 		crtc_state->shared_dpll &&
 		crtc_state->port_clock == 0;
@@ -574,13 +572,12 @@ static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state)
 static void intel_sanitize_encoder(struct intel_encoder *encoder)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct intel_connector *connector;
 	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
 	struct intel_crtc_state *crtc_state = crtc ?
 		to_intel_crtc_state(crtc->base.state) : NULL;
 	struct intel_pmdemand_state *pmdemand_state =
-		to_intel_pmdemand_state(i915->display.pmdemand.obj.state);
+		to_intel_pmdemand_state(display->pmdemand.obj.state);
 
 	/*
 	 * We need to check both for a crtc link (meaning that the encoder is
@@ -591,7 +588,7 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
 		crtc_state->hw.active;
 
 	if (crtc_state && has_bogus_dpll_config(crtc_state)) {
-		drm_dbg_kms(&i915->drm,
+		drm_dbg_kms(display->drm,
 			    "BIOS has misprogrammed the hardware. Disabling pipe %c\n",
 			    pipe_name(crtc->pipe));
 		has_active_crtc = false;
@@ -599,7 +596,7 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
 
 	connector = intel_encoder_find_connector(encoder);
 	if (connector && !has_active_crtc) {
-		drm_dbg_kms(&i915->drm,
+		drm_dbg_kms(display->drm,
 			    "[ENCODER:%d:%s] has active connectors but no active pipe!\n",
 			    encoder->base.base.id,
 			    encoder->base.name);
@@ -616,7 +613,7 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
 		if (crtc_state) {
 			struct drm_encoder *best_encoder;
 
-			drm_dbg_kms(&i915->drm,
+			drm_dbg_kms(display->drm,
 				    "[ENCODER:%d:%s] manually disabled\n",
 				    encoder->base.base.id,
 				    encoder->base.name);
@@ -650,18 +647,17 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
 	/* notify opregion of the sanitized encoder state */
 	intel_opregion_notify_encoder(encoder, connector && has_active_crtc);
 
-	if (HAS_DDI(i915))
+	if (HAS_DDI(display))
 		intel_ddi_sanitize_encoder_pll_mapping(encoder);
 }
 
 /* FIXME read out full plane state for all planes */
-static void readout_plane_state(struct drm_i915_private *i915)
+static void readout_plane_state(struct intel_display *display)
 {
-	struct intel_display *display = &i915->display;
 	struct intel_plane *plane;
 	struct intel_crtc *crtc;
 
-	for_each_intel_plane(&i915->drm, plane) {
+	for_each_intel_plane(display->drm, plane) {
 		struct intel_plane_state *plane_state =
 			to_intel_plane_state(plane->base.state);
 		struct intel_crtc_state *crtc_state;
@@ -675,13 +671,13 @@ static void readout_plane_state(struct drm_i915_private *i915)
 
 		intel_set_plane_visible(crtc_state, plane_state, visible);
 
-		drm_dbg_kms(&i915->drm,
+		drm_dbg_kms(display->drm,
 			    "[PLANE:%d:%s] hw state readout: %s, pipe %c\n",
 			    plane->base.base.id, plane->base.name,
 			    str_enabled_disabled(visible), pipe_name(pipe));
 	}
 
-	for_each_intel_crtc(&i915->drm, crtc) {
+	for_each_intel_crtc(display->drm, crtc) {
 		struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
 
@@ -689,18 +685,17 @@ static void readout_plane_state(struct drm_i915_private *i915)
 	}
 }
 
-static void intel_modeset_readout_hw_state(struct drm_i915_private *i915)
+static void intel_modeset_readout_hw_state(struct intel_display *display)
 {
-	struct intel_display *display = &i915->display;
 	struct intel_pmdemand_state *pmdemand_state =
-		to_intel_pmdemand_state(i915->display.pmdemand.obj.state);
+		to_intel_pmdemand_state(display->pmdemand.obj.state);
 	enum pipe pipe;
 	struct intel_crtc *crtc;
 	struct intel_encoder *encoder;
 	struct intel_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 
-	for_each_intel_crtc(&i915->drm, crtc) {
+	for_each_intel_crtc(display->drm, crtc) {
 		struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
 
@@ -715,15 +710,15 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915)
 		crtc->base.enabled = crtc_state->hw.enable;
 		crtc->active = crtc_state->hw.active;
 
-		drm_dbg_kms(&i915->drm,
+		drm_dbg_kms(display->drm,
 			    "[CRTC:%d:%s] hw state readout: %s\n",
 			    crtc->base.base.id, crtc->base.name,
 			    str_enabled_disabled(crtc_state->hw.active));
 	}
 
-	readout_plane_state(i915);
+	readout_plane_state(display);
 
-	for_each_intel_encoder(&i915->drm, encoder) {
+	for_each_intel_encoder(display->drm, encoder) {
 		struct intel_crtc_state *crtc_state = NULL;
 
 		pipe = 0;
@@ -742,7 +737,7 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915)
 				/* encoder should read be linked to joiner primary */
 				WARN_ON(intel_crtc_is_joiner_secondary(crtc_state));
 
-				for_each_intel_crtc_in_pipe_mask(&i915->drm, secondary_crtc,
+				for_each_intel_crtc_in_pipe_mask(display->drm, secondary_crtc,
 								 intel_crtc_joiner_secondary_pipes(crtc_state)) {
 					struct intel_crtc_state *secondary_crtc_state;
 
@@ -765,7 +760,7 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915)
 		if (encoder->sync_state)
 			encoder->sync_state(encoder, crtc_state);
 
-		drm_dbg_kms(&i915->drm,
+		drm_dbg_kms(display->drm,
 			    "[ENCODER:%d:%s] hw state readout: %s, pipe %c\n",
 			    encoder->base.base.id, encoder->base.name,
 			    str_enabled_disabled(encoder->base.crtc),
@@ -774,7 +769,7 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915)
 
 	intel_dpll_readout_hw_state(display);
 
-	drm_connector_list_iter_begin(&i915->drm, &conn_iter);
+	drm_connector_list_iter_begin(display->drm, &conn_iter);
 	for_each_intel_connector_iter(connector, &conn_iter) {
 		struct intel_crtc_state *crtc_state = NULL;
 
@@ -808,14 +803,14 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915)
 		if (connector->sync_state)
 			connector->sync_state(connector, crtc_state);
 
-		drm_dbg_kms(&i915->drm,
+		drm_dbg_kms(display->drm,
 			    "[CONNECTOR:%d:%s] hw state readout: %s\n",
 			    connector->base.base.id, connector->base.name,
 			    str_enabled_disabled(connector->base.encoder));
 	}
 	drm_connector_list_iter_end(&conn_iter);
 
-	for_each_intel_crtc(&i915->drm, crtc) {
+	for_each_intel_crtc(display->drm, crtc) {
 		struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
 		struct intel_plane *plane;
@@ -838,7 +833,7 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915)
 			intel_crtc_copy_hw_to_uapi_state(crtc_state);
 		}
 
-		for_each_intel_plane_on_crtc(&i915->drm, crtc, plane) {
+		for_each_intel_plane_on_crtc(display->drm, crtc, plane) {
 			const struct intel_plane_state *plane_state =
 				to_intel_plane_state(plane->base.state);
 
@@ -854,14 +849,14 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915)
 			 * use plane->min_cdclk() :(
 			 */
 			if (plane_state->uapi.visible && plane->min_cdclk) {
-				if (crtc_state->double_wide || DISPLAY_VER(i915) >= 10)
+				if (crtc_state->double_wide || DISPLAY_VER(display) >= 10)
 					crtc_state->min_cdclk[plane->id] =
 						DIV_ROUND_UP(crtc_state->pixel_rate, 2);
 				else
 					crtc_state->min_cdclk[plane->id] =
 						crtc_state->pixel_rate;
 			}
-			drm_dbg_kms(&i915->drm,
+			drm_dbg_kms(display->drm,
 				    "[PLANE:%d:%s] min_cdclk %d kHz\n",
 				    plane->base.base.id, plane->base.name,
 				    crtc_state->min_cdclk[plane->id]);
@@ -882,11 +877,11 @@ static void intel_modeset_readout_hw_state(struct drm_i915_private *i915)
 }
 
 static void
-get_encoder_power_domains(struct drm_i915_private *i915)
+get_encoder_power_domains(struct intel_display *display)
 {
 	struct intel_encoder *encoder;
 
-	for_each_intel_encoder(&i915->drm, encoder) {
+	for_each_intel_encoder(display->drm, encoder) {
 		struct intel_crtc_state *crtc_state;
 
 		if (!encoder->get_power_domains)
@@ -904,47 +899,49 @@ get_encoder_power_domains(struct drm_i915_private *i915)
 	}
 }
 
-static void intel_early_display_was(struct drm_i915_private *i915)
+static void intel_early_display_was(struct intel_display *display)
 {
 	/*
 	 * Display WA #1185 WaDisableDARBFClkGating:glk,icl,ehl,tgl
 	 * Also known as Wa_14010480278.
 	 */
-	if (IS_DISPLAY_VER(i915, 10, 12))
-		intel_de_rmw(i915, GEN9_CLKGATE_DIS_0, 0, DARBF_GATING_DIS);
+	if (IS_DISPLAY_VER(display, 10, 12))
+		intel_de_rmw(display, GEN9_CLKGATE_DIS_0, 0, DARBF_GATING_DIS);
 
 	/*
 	 * WaRsPkgCStateDisplayPMReq:hsw
 	 * System hang if this isn't done before disabling all planes!
 	 */
-	if (IS_HASWELL(i915))
-		intel_de_rmw(i915, CHICKEN_PAR1_1, 0, FORCE_ARB_IDLE_PLANES);
+	if (display->platform.haswell)
+		intel_de_rmw(display, CHICKEN_PAR1_1, 0, FORCE_ARB_IDLE_PLANES);
 
-	if (IS_KABYLAKE(i915) || IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) {
+	if (display->platform.kabylake || display->platform.coffeelake ||
+	    display->platform.cometlake) {
 		/* Display WA #1142:kbl,cfl,cml */
-		intel_de_rmw(i915, CHICKEN_PAR1_1,
+		intel_de_rmw(display, CHICKEN_PAR1_1,
 			     KBL_ARB_FILL_SPARE_22, KBL_ARB_FILL_SPARE_22);
-		intel_de_rmw(i915, CHICKEN_MISC_2,
+		intel_de_rmw(display, CHICKEN_MISC_2,
 			     KBL_ARB_FILL_SPARE_13 | KBL_ARB_FILL_SPARE_14,
 			     KBL_ARB_FILL_SPARE_14);
 	}
 }
 
-void intel_modeset_setup_hw_state(struct drm_i915_private *i915,
+void intel_modeset_setup_hw_state(struct intel_display *display,
 				  struct drm_modeset_acquire_ctx *ctx)
 {
-	struct intel_display *display = &i915->display;
 	struct intel_encoder *encoder;
 	struct intel_crtc *crtc;
 	intel_wakeref_t wakeref;
 
 	wakeref = intel_display_power_get(display, POWER_DOMAIN_INIT);
 
-	intel_early_display_was(i915);
-	intel_modeset_readout_hw_state(i915);
+	intel_early_display_was(display);
+	intel_vga_disable(display);
+
+	intel_modeset_readout_hw_state(display);
 
 	/* HW state is read out, now we need to sanitize this mess. */
-	get_encoder_power_domains(i915);
+	get_encoder_power_domains(display);
 
 	intel_pch_sanitize(display);
 
@@ -954,7 +951,7 @@ void intel_modeset_setup_hw_state(struct drm_i915_private *i915,
 	 * intel_sanitize_plane_mapping() may need to do vblank
 	 * waits, so we need vblank interrupts restored beforehand.
 	 */
-	for_each_intel_crtc(&i915->drm, crtc) {
+	for_each_intel_crtc(display->drm, crtc) {
 		struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
 
@@ -968,20 +965,20 @@ void intel_modeset_setup_hw_state(struct drm_i915_private *i915,
 		}
 	}
 
-	intel_fbc_sanitize(&i915->display);
+	intel_fbc_sanitize(display);
 
-	intel_sanitize_plane_mapping(i915);
+	intel_sanitize_plane_mapping(display);
 
-	for_each_intel_encoder(&i915->drm, encoder)
+	for_each_intel_encoder(display->drm, encoder)
 		intel_sanitize_encoder(encoder);
 
 	/*
 	 * Sanitizing CRTCs needs their connector atomic state to be
 	 * up-to-date, so ensure that already here.
 	 */
-	intel_modeset_update_connector_atomic_state(i915);
+	intel_modeset_update_connector_atomic_state(display);
 
-	intel_sanitize_all_crtcs(i915, ctx);
+	intel_sanitize_all_crtcs(display, ctx);
 
 	intel_dpll_sanitize_state(display);
 
@@ -990,13 +987,13 @@ void intel_modeset_setup_hw_state(struct drm_i915_private *i915,
 		intel_wm_get_hw_state(display);
 	intel_wm_sanitize(display);
 
-	for_each_intel_crtc(&i915->drm, crtc) {
+	for_each_intel_crtc(display->drm, crtc) {
 		struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
 		struct intel_power_domain_mask put_domains;
 
 		intel_modeset_get_crtc_power_domains(crtc_state, &put_domains);
-		if (drm_WARN_ON(&i915->drm, !bitmap_empty(put_domains.bits, POWER_DOMAIN_NUM)))
+		if (drm_WARN_ON(display->drm, !bitmap_empty(put_domains.bits, POWER_DOMAIN_NUM)))
 			intel_modeset_put_crtc_power_domains(crtc, &put_domains);
 	}
 
diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.h b/drivers/gpu/drm/i915/display/intel_modeset_setup.h
index 3beff67b33d0..f5e6f3ae9572 100644
--- a/drivers/gpu/drm/i915/display/intel_modeset_setup.h
+++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.h
@@ -6,10 +6,10 @@
 #ifndef __INTEL_MODESET_SETUP_H__
 #define __INTEL_MODESET_SETUP_H__
 
-struct drm_i915_private;
 struct drm_modeset_acquire_ctx;
+struct intel_display;
 
-void intel_modeset_setup_hw_state(struct drm_i915_private *i915,
+void intel_modeset_setup_hw_state(struct intel_display *display,
 				  struct drm_modeset_acquire_ctx *ctx);
 
 #endif /* __INTEL_MODESET_SETUP_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_modeset_verify.c b/drivers/gpu/drm/i915/display/intel_modeset_verify.c
index a008412fdd04..766a9983665a 100644
--- a/drivers/gpu/drm/i915/display/intel_modeset_verify.c
+++ b/drivers/gpu/drm/i915/display/intel_modeset_verify.c
@@ -6,13 +6,14 @@
  */
 
 #include <drm/drm_atomic_state_helper.h>
+#include <drm/drm_print.h>
 
-#include "i915_drv.h"
 #include "intel_atomic.h"
 #include "intel_crtc.h"
 #include "intel_crtc_state_dump.h"
 #include "intel_cx0_phy.h"
 #include "intel_display.h"
+#include "intel_display_core.h"
 #include "intel_display_types.h"
 #include "intel_fdi.h"
 #include "intel_modeset_verify.h"
@@ -28,9 +29,8 @@ static void intel_connector_verify_state(const struct intel_crtc_state *crtc_sta
 {
 	struct intel_connector *connector = to_intel_connector(conn_state->connector);
 	struct intel_display *display = to_intel_display(connector);
-	struct drm_i915_private *i915 = to_i915(connector->base.dev);
 
-	drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s]\n",
+	drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s]\n",
 		    connector->base.base.id, connector->base.name);
 
 	if (connector->get_hw_state(connector)) {
@@ -91,7 +91,6 @@ verify_connector_state(struct intel_atomic_state *state,
 static void intel_pipe_config_sanity_check(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_display *display = to_intel_display(crtc_state);
-	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
 
 	if (crtc_state->has_pch_encoder) {
 		int fdi_dotclock = intel_dotclock_calculate(intel_fdi_link_freq(display, crtc_state),
@@ -103,7 +102,7 @@ static void intel_pipe_config_sanity_check(const struct intel_crtc_state *crtc_s
 		 * Yell if the encoder disagrees. Allow for slight
 		 * rounding differences.
 		 */
-		drm_WARN(&i915->drm, abs(fdi_dotclock - dotclock) > 1,
+		drm_WARN(display->drm, abs(fdi_dotclock - dotclock) > 1,
 			 "FDI dotclock and encoder dotclock mismatch, fdi: %i, encoder: %i\n",
 			 fdi_dotclock, dotclock);
 	}
@@ -113,17 +112,16 @@ static void
 verify_encoder_state(struct intel_atomic_state *state)
 {
 	struct intel_display *display = to_intel_display(state);
-	struct drm_i915_private *i915 = to_i915(state->base.dev);
 	struct intel_encoder *encoder;
 	struct drm_connector *connector;
 	const struct drm_connector_state *old_conn_state, *new_conn_state;
 	int i;
 
-	for_each_intel_encoder(&i915->drm, encoder) {
+	for_each_intel_encoder(display->drm, encoder) {
 		bool enabled = false, found = false;
 		enum pipe pipe;
 
-		drm_dbg_kms(&i915->drm, "[ENCODER:%d:%s]\n",
+		drm_dbg_kms(display->drm, "[ENCODER:%d:%s]\n",
 			    encoder->base.base.id,
 			    encoder->base.name);
 
@@ -166,7 +164,6 @@ verify_crtc_state(struct intel_atomic_state *state,
 		  struct intel_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(state);
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	const struct intel_crtc_state *sw_crtc_state =
 		intel_atomic_get_new_crtc_state(state, crtc);
 	struct intel_crtc_state *hw_crtc_state;
@@ -185,7 +182,7 @@ verify_crtc_state(struct intel_atomic_state *state,
 	intel_crtc_get_pipe_config(hw_crtc_state);
 
 	/* we keep both pipes enabled on 830 */
-	if (IS_I830(i915) && hw_crtc_state->hw.active)
+	if (display->platform.i830 && hw_crtc_state->hw.active)
 		hw_crtc_state->hw.active = sw_crtc_state->hw.active;
 
 	INTEL_DISPLAY_STATE_WARN(display,
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index aff9a3455c1b..12308495afa5 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -291,7 +291,6 @@ static void intel_overlay_flip_prepare(struct intel_overlay *overlay,
 				       struct i915_vma *vma)
 {
 	struct intel_display *display = overlay->display;
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	enum pipe pipe = overlay->crtc->pipe;
 	struct intel_frontbuffer *frontbuffer = NULL;
 
@@ -307,7 +306,7 @@ static void intel_overlay_flip_prepare(struct intel_overlay *overlay,
 		intel_frontbuffer_put(overlay->frontbuffer);
 	overlay->frontbuffer = frontbuffer;
 
-	intel_frontbuffer_flip_prepare(i915, INTEL_FRONTBUFFER_OVERLAY(pipe));
+	intel_frontbuffer_flip_prepare(display, INTEL_FRONTBUFFER_OVERLAY(pipe));
 
 	overlay->old_vma = overlay->vma;
 	if (vma)
@@ -359,14 +358,13 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 static void intel_overlay_release_old_vma(struct intel_overlay *overlay)
 {
 	struct intel_display *display = overlay->display;
-	struct drm_i915_private *i915 = to_i915(display->drm);
 	struct i915_vma *vma;
 
 	vma = fetch_and_zero(&overlay->old_vma);
 	if (drm_WARN_ON(display->drm, !vma))
 		return;
 
-	intel_frontbuffer_flip_complete(i915, INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
+	intel_frontbuffer_flip_complete(display, INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
 
 	i915_vma_unpin(vma);
 	i915_vma_put(vma);
diff --git a/drivers/gpu/drm/i915/display/intel_pch.c b/drivers/gpu/drm/i915/display/intel_pch.c
new file mode 100644
index 000000000000..469e8a3cfb49
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_pch.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2025 Intel Corporation.
+ */
+
+#include <drm/drm_print.h>
+
+#include "i915_utils.h"
+#include "intel_display_core.h"
+#include "intel_pch.h"
+
+#define INTEL_PCH_DEVICE_ID_MASK		0xff80
+#define INTEL_PCH_IBX_DEVICE_ID_TYPE		0x3b00
+#define INTEL_PCH_CPT_DEVICE_ID_TYPE		0x1c00
+#define INTEL_PCH_PPT_DEVICE_ID_TYPE		0x1e00
+#define INTEL_PCH_LPT_DEVICE_ID_TYPE		0x8c00
+#define INTEL_PCH_LPT_LP_DEVICE_ID_TYPE		0x9c00
+#define INTEL_PCH_WPT_DEVICE_ID_TYPE		0x8c80
+#define INTEL_PCH_WPT_LP_DEVICE_ID_TYPE		0x9c80
+#define INTEL_PCH_SPT_DEVICE_ID_TYPE		0xA100
+#define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE		0x9D00
+#define INTEL_PCH_KBP_DEVICE_ID_TYPE		0xA280
+#define INTEL_PCH_CNP_DEVICE_ID_TYPE		0xA300
+#define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE		0x9D80
+#define INTEL_PCH_CMP_DEVICE_ID_TYPE		0x0280
+#define INTEL_PCH_CMP2_DEVICE_ID_TYPE		0x0680
+#define INTEL_PCH_CMP_V_DEVICE_ID_TYPE		0xA380
+#define INTEL_PCH_ICP_DEVICE_ID_TYPE		0x3480
+#define INTEL_PCH_ICP2_DEVICE_ID_TYPE		0x3880
+#define INTEL_PCH_MCC_DEVICE_ID_TYPE		0x4B00
+#define INTEL_PCH_TGP_DEVICE_ID_TYPE		0xA080
+#define INTEL_PCH_TGP2_DEVICE_ID_TYPE		0x4380
+#define INTEL_PCH_JSP_DEVICE_ID_TYPE		0x4D80
+#define INTEL_PCH_ADP_DEVICE_ID_TYPE		0x7A80
+#define INTEL_PCH_ADP2_DEVICE_ID_TYPE		0x5180
+#define INTEL_PCH_ADP3_DEVICE_ID_TYPE		0x7A00
+#define INTEL_PCH_ADP4_DEVICE_ID_TYPE		0x5480
+#define INTEL_PCH_P2X_DEVICE_ID_TYPE		0x7100
+#define INTEL_PCH_P3X_DEVICE_ID_TYPE		0x7000
+#define INTEL_PCH_QEMU_DEVICE_ID_TYPE		0x2900 /* qemu q35 has 2918 */
+
+/*
+ * Check for platforms where the south display is on the same PCI device or SoC
+ * die as the north display. The PCH (if it even exists) is not involved in
+ * display. Return a fake PCH type for south display handling on these
+ * platforms, without actually detecting the PCH, and PCH_NONE otherwise.
+ */
+static enum intel_pch intel_pch_fake_for_south_display(struct intel_display *display)
+{
+	enum intel_pch pch_type = PCH_NONE;
+
+	if (DISPLAY_VER(display) >= 20)
+		pch_type = PCH_LNL;
+	else if (display->platform.battlemage || display->platform.meteorlake)
+		pch_type = PCH_MTL;
+	else if (display->platform.dg2)
+		pch_type = PCH_DG2;
+	else if (display->platform.dg1)
+		pch_type = PCH_DG1;
+
+	return pch_type;
+}
+
+/* Map PCH device id to PCH type, or PCH_NONE if unknown. */
+static enum intel_pch
+intel_pch_type(const struct intel_display *display, unsigned short id)
+{
+	switch (id) {
+	case INTEL_PCH_IBX_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found Ibex Peak PCH\n");
+		drm_WARN_ON(display->drm, DISPLAY_VER(display) != 5);
+		return PCH_IBX;
+	case INTEL_PCH_CPT_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found CougarPoint PCH\n");
+		drm_WARN_ON(display->drm,
+			    DISPLAY_VER(display) != 6 &&
+			    !display->platform.ivybridge);
+		return PCH_CPT;
+	case INTEL_PCH_PPT_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found PantherPoint PCH\n");
+		drm_WARN_ON(display->drm,
+			    DISPLAY_VER(display) != 6 &&
+			    !display->platform.ivybridge);
+		/* PPT is CPT compatible */
+		return PCH_CPT;
+	case INTEL_PCH_LPT_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found LynxPoint PCH\n");
+		drm_WARN_ON(display->drm,
+			    !display->platform.haswell &&
+			    !display->platform.broadwell);
+		drm_WARN_ON(display->drm,
+			    display->platform.haswell_ult ||
+			    display->platform.broadwell_ult);
+		return PCH_LPT_H;
+	case INTEL_PCH_LPT_LP_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found LynxPoint LP PCH\n");
+		drm_WARN_ON(display->drm,
+			    !display->platform.haswell &&
+			    !display->platform.broadwell);
+		drm_WARN_ON(display->drm,
+			    !display->platform.haswell_ult &&
+			    !display->platform.broadwell_ult);
+		return PCH_LPT_LP;
+	case INTEL_PCH_WPT_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found WildcatPoint PCH\n");
+		drm_WARN_ON(display->drm,
+			    !display->platform.haswell &&
+			    !display->platform.broadwell);
+		drm_WARN_ON(display->drm,
+			    display->platform.haswell_ult ||
+			    display->platform.broadwell_ult);
+		/* WPT is LPT compatible */
+		return PCH_LPT_H;
+	case INTEL_PCH_WPT_LP_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found WildcatPoint LP PCH\n");
+		drm_WARN_ON(display->drm,
+			    !display->platform.haswell &&
+			    !display->platform.broadwell);
+		drm_WARN_ON(display->drm,
+			    !display->platform.haswell_ult &&
+			    !display->platform.broadwell_ult);
+		/* WPT is LPT compatible */
+		return PCH_LPT_LP;
+	case INTEL_PCH_SPT_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found SunrisePoint PCH\n");
+		drm_WARN_ON(display->drm,
+			    !display->platform.skylake &&
+			    !display->platform.kabylake &&
+			    !display->platform.coffeelake);
+		return PCH_SPT;
+	case INTEL_PCH_SPT_LP_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found SunrisePoint LP PCH\n");
+		drm_WARN_ON(display->drm,
+			    !display->platform.skylake &&
+			    !display->platform.kabylake &&
+			    !display->platform.coffeelake &&
+			    !display->platform.cometlake);
+		return PCH_SPT;
+	case INTEL_PCH_KBP_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found Kaby Lake PCH (KBP)\n");
+		drm_WARN_ON(display->drm,
+			    !display->platform.skylake &&
+			    !display->platform.kabylake &&
+			    !display->platform.coffeelake &&
+			    !display->platform.cometlake);
+		/* KBP is SPT compatible */
+		return PCH_SPT;
+	case INTEL_PCH_CNP_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found Cannon Lake PCH (CNP)\n");
+		drm_WARN_ON(display->drm,
+			    !display->platform.coffeelake &&
+			    !display->platform.cometlake);
+		return PCH_CNP;
+	case INTEL_PCH_CNP_LP_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm,
+			    "Found Cannon Lake LP PCH (CNP-LP)\n");
+		drm_WARN_ON(display->drm,
+			    !display->platform.coffeelake &&
+			    !display->platform.cometlake);
+		return PCH_CNP;
+	case INTEL_PCH_CMP_DEVICE_ID_TYPE:
+	case INTEL_PCH_CMP2_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found Comet Lake PCH (CMP)\n");
+		drm_WARN_ON(display->drm,
+			    !display->platform.coffeelake &&
+			    !display->platform.cometlake &&
+			    !display->platform.rocketlake);
+		/* CMP is CNP compatible */
+		return PCH_CNP;
+	case INTEL_PCH_CMP_V_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found Comet Lake V PCH (CMP-V)\n");
+		drm_WARN_ON(display->drm,
+			    !display->platform.coffeelake &&
+			    !display->platform.cometlake);
+		/* CMP-V is based on KBP, which is SPT compatible */
+		return PCH_SPT;
+	case INTEL_PCH_ICP_DEVICE_ID_TYPE:
+	case INTEL_PCH_ICP2_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found Ice Lake PCH\n");
+		drm_WARN_ON(display->drm, !display->platform.icelake);
+		return PCH_ICP;
+	case INTEL_PCH_MCC_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found Mule Creek Canyon PCH\n");
+		drm_WARN_ON(display->drm, !(display->platform.jasperlake ||
+					    display->platform.elkhartlake));
+		/* MCC is TGP compatible */
+		return PCH_TGP;
+	case INTEL_PCH_TGP_DEVICE_ID_TYPE:
+	case INTEL_PCH_TGP2_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found Tiger Lake LP PCH\n");
+		drm_WARN_ON(display->drm, !display->platform.tigerlake &&
+			    !display->platform.rocketlake &&
+			    !display->platform.skylake &&
+			    !display->platform.kabylake &&
+			    !display->platform.coffeelake &&
+			    !display->platform.cometlake);
+		return PCH_TGP;
+	case INTEL_PCH_JSP_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found Jasper Lake PCH\n");
+		drm_WARN_ON(display->drm, !(display->platform.jasperlake ||
+					    display->platform.elkhartlake));
+		/* JSP is ICP compatible */
+		return PCH_ICP;
+	case INTEL_PCH_ADP_DEVICE_ID_TYPE:
+	case INTEL_PCH_ADP2_DEVICE_ID_TYPE:
+	case INTEL_PCH_ADP3_DEVICE_ID_TYPE:
+	case INTEL_PCH_ADP4_DEVICE_ID_TYPE:
+		drm_dbg_kms(display->drm, "Found Alder Lake PCH\n");
+		drm_WARN_ON(display->drm, !display->platform.alderlake_s &&
+			    !display->platform.alderlake_p);
+		return PCH_ADP;
+	default:
+		return PCH_NONE;
+	}
+}
+
+static bool intel_is_virt_pch(unsigned short id,
+			      unsigned short svendor, unsigned short sdevice)
+{
+	return (id == INTEL_PCH_P2X_DEVICE_ID_TYPE ||
+		id == INTEL_PCH_P3X_DEVICE_ID_TYPE ||
+		(id == INTEL_PCH_QEMU_DEVICE_ID_TYPE &&
+		 svendor == PCI_SUBVENDOR_ID_REDHAT_QUMRANET &&
+		 sdevice == PCI_SUBDEVICE_ID_QEMU));
+}
+
+static void
+intel_virt_detect_pch(const struct intel_display *display,
+		      unsigned short *pch_id, enum intel_pch *pch_type)
+{
+	unsigned short id = 0;
+
+	/*
+	 * In a virtualized passthrough environment we can be in a
+	 * setup where the ISA bridge is not able to be passed through.
+	 * In this case, a south bridge can be emulated and we have to
+	 * make an educated guess as to which PCH is really there.
+	 */
+
+	if (display->platform.alderlake_s || display->platform.alderlake_p)
+		id = INTEL_PCH_ADP_DEVICE_ID_TYPE;
+	else if (display->platform.tigerlake || display->platform.rocketlake)
+		id = INTEL_PCH_TGP_DEVICE_ID_TYPE;
+	else if (display->platform.jasperlake || display->platform.elkhartlake)
+		id = INTEL_PCH_MCC_DEVICE_ID_TYPE;
+	else if (display->platform.icelake)
+		id = INTEL_PCH_ICP_DEVICE_ID_TYPE;
+	else if (display->platform.coffeelake ||
+		 display->platform.cometlake)
+		id = INTEL_PCH_CNP_DEVICE_ID_TYPE;
+	else if (display->platform.kabylake || display->platform.skylake)
+		id = INTEL_PCH_SPT_DEVICE_ID_TYPE;
+	else if (display->platform.haswell_ult ||
+		 display->platform.broadwell_ult)
+		id = INTEL_PCH_LPT_LP_DEVICE_ID_TYPE;
+	else if (display->platform.haswell || display->platform.broadwell)
+		id = INTEL_PCH_LPT_DEVICE_ID_TYPE;
+	else if (DISPLAY_VER(display) == 6 || display->platform.ivybridge)
+		id = INTEL_PCH_CPT_DEVICE_ID_TYPE;
+	else if (DISPLAY_VER(display) == 5)
+		id = INTEL_PCH_IBX_DEVICE_ID_TYPE;
+
+	if (id)
+		drm_dbg_kms(display->drm, "Assuming PCH ID %04x\n", id);
+	else
+		drm_dbg_kms(display->drm, "Assuming no PCH\n");
+
+	*pch_type = intel_pch_type(display, id);
+
+	/* Sanity check virtual PCH id */
+	if (drm_WARN_ON(display->drm,
+			id && *pch_type == PCH_NONE))
+		id = 0;
+
+	*pch_id = id;
+}
+
+void intel_pch_detect(struct intel_display *display)
+{
+	struct pci_dev *pch = NULL;
+	unsigned short id;
+	enum intel_pch pch_type;
+
+	pch_type = intel_pch_fake_for_south_display(display);
+	if (pch_type != PCH_NONE) {
+		display->pch_type = pch_type;
+		drm_dbg_kms(display->drm,
+			    "PCH not involved in display, using fake PCH type %d for south display\n",
+			    pch_type);
+		return;
+	}
+
+	/*
+	 * The reason to probe ISA bridge instead of Dev31:Fun0 is to
+	 * make graphics device passthrough work easy for VMM, that only
+	 * need to expose ISA bridge to let driver know the real hardware
+	 * underneath. This is a requirement from virtualization team.
+	 *
+	 * In some virtualized environments (e.g. XEN), there is irrelevant
+	 * ISA bridge in the system. To work reliably, we should scan through
+	 * all the ISA bridge devices and check for the first match, instead
+	 * of only checking the first one.
+	 */
+	while ((pch = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, pch))) {
+		if (pch->vendor != PCI_VENDOR_ID_INTEL)
+			continue;
+
+		id = pch->device & INTEL_PCH_DEVICE_ID_MASK;
+
+		pch_type = intel_pch_type(display, id);
+		if (pch_type != PCH_NONE) {
+			display->pch_type = pch_type;
+			break;
+		} else if (intel_is_virt_pch(id, pch->subsystem_vendor,
+					     pch->subsystem_device)) {
+			intel_virt_detect_pch(display, &id, &pch_type);
+			display->pch_type = pch_type;
+			break;
+		}
+	}
+
+	/*
+	 * Use PCH_NOP (PCH but no South Display) for PCH platforms without
+	 * display.
+	 */
+	if (pch && !HAS_DISPLAY(display)) {
+		drm_dbg_kms(display->drm,
+			    "Display disabled, reverting to NOP PCH\n");
+		display->pch_type = PCH_NOP;
+	} else if (!pch) {
+		if (i915_run_as_guest() && HAS_DISPLAY(display)) {
+			intel_virt_detect_pch(display, &id, &pch_type);
+			display->pch_type = pch_type;
+		} else {
+			drm_dbg_kms(display->drm, "No PCH found.\n");
+		}
+	}
+
+	pci_dev_put(pch);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_pch.h b/drivers/gpu/drm/i915/display/intel_pch.h
new file mode 100644
index 000000000000..cf4dab1b98bf
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_pch.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Intel Corporation.
+ */
+
+#ifndef __INTEL_PCH__
+#define __INTEL_PCH__
+
+#include "intel_display_conversion.h"
+
+struct intel_display;
+
+/*
+ * Sorted by south display engine compatibility.
+ * If the new PCH comes with a south display engine that is not
+ * inherited from the latest item, please do not add it to the
+ * end. Instead, add it right after its "parent" PCH.
+ */
+enum intel_pch {
+	PCH_NOP = -1,	/* PCH without south display */
+	PCH_NONE = 0,	/* No PCH present */
+	PCH_IBX,	/* Ibexpeak PCH */
+	PCH_CPT,	/* Cougarpoint/Pantherpoint PCH */
+	PCH_LPT_H,	/* Lynxpoint/Wildcatpoint H PCH */
+	PCH_LPT_LP,	/* Lynxpoint/Wildcatpoint LP PCH */
+	PCH_SPT,        /* Sunrisepoint/Kaby Lake PCH */
+	PCH_CNP,        /* Cannon/Comet Lake PCH */
+	PCH_ICP,	/* Ice Lake/Jasper Lake PCH */
+	PCH_TGP,	/* Tiger Lake/Mule Creek Canyon PCH */
+	PCH_ADP,	/* Alder Lake PCH */
+
+	/* Fake PCHs, functionality handled on the same PCI dev */
+	PCH_DG1 = 1024,
+	PCH_DG2,
+	PCH_MTL,
+	PCH_LNL,
+};
+
+#define INTEL_PCH_TYPE(_display)		(__to_intel_display(_display)->pch_type)
+#define HAS_PCH_DG2(display)			(INTEL_PCH_TYPE(display) == PCH_DG2)
+#define HAS_PCH_ADP(display)			(INTEL_PCH_TYPE(display) == PCH_ADP)
+#define HAS_PCH_DG1(display)			(INTEL_PCH_TYPE(display) == PCH_DG1)
+#define HAS_PCH_TGP(display)			(INTEL_PCH_TYPE(display) == PCH_TGP)
+#define HAS_PCH_ICP(display)			(INTEL_PCH_TYPE(display) == PCH_ICP)
+#define HAS_PCH_CNP(display)			(INTEL_PCH_TYPE(display) == PCH_CNP)
+#define HAS_PCH_SPT(display)			(INTEL_PCH_TYPE(display) == PCH_SPT)
+#define HAS_PCH_LPT_H(display)			(INTEL_PCH_TYPE(display) == PCH_LPT_H)
+#define HAS_PCH_LPT_LP(display)			(INTEL_PCH_TYPE(display) == PCH_LPT_LP)
+#define HAS_PCH_LPT(display)			(INTEL_PCH_TYPE(display) == PCH_LPT_H || \
+						 INTEL_PCH_TYPE(display) == PCH_LPT_LP)
+#define HAS_PCH_CPT(display)			(INTEL_PCH_TYPE(display) == PCH_CPT)
+#define HAS_PCH_IBX(display)			(INTEL_PCH_TYPE(display) == PCH_IBX)
+#define HAS_PCH_NOP(display)			(INTEL_PCH_TYPE(display) == PCH_NOP)
+#define HAS_PCH_SPLIT(display)			(INTEL_PCH_TYPE(display) != PCH_NONE)
+
+void intel_pch_detect(struct intel_display *display);
+
+#endif /* __INTEL_PCH__ */
diff --git a/drivers/gpu/drm/i915/display/intel_pch_display.c b/drivers/gpu/drm/i915/display/intel_pch_display.c
index b909ed18a5b2..1743ebf551cb 100644
--- a/drivers/gpu/drm/i915/display/intel_pch_display.c
+++ b/drivers/gpu/drm/i915/display/intel_pch_display.c
@@ -3,8 +3,9 @@
  * Copyright © 2021 Intel Corporation
  */
 
+#include <drm/drm_print.h>
+
 #include "g4x_dp.h"
-#include "i915_drv.h"
 #include "i915_reg.h"
 #include "intel_crt.h"
 #include "intel_crt_regs.h"
@@ -23,17 +24,15 @@
 bool intel_has_pch_trancoder(struct intel_display *display,
 			     enum pipe pch_transcoder)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
-	return HAS_PCH_IBX(i915) || HAS_PCH_CPT(i915) ||
-		(HAS_PCH_LPT_H(i915) && pch_transcoder == PIPE_A);
+	return HAS_PCH_IBX(display) || HAS_PCH_CPT(display) ||
+		(HAS_PCH_LPT_H(display) && pch_transcoder == PIPE_A);
 }
 
 enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc)
 {
-	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	struct intel_display *display = to_intel_display(crtc);
 
-	if (HAS_PCH_LPT(i915))
+	if (HAS_PCH_LPT(display))
 		return PIPE_A;
 	else
 		return crtc->pipe;
@@ -43,7 +42,6 @@ static void assert_pch_dp_disabled(struct intel_display *display,
 				   enum pipe pipe, enum port port,
 				   i915_reg_t dp_reg)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	enum pipe port_pipe;
 	bool state;
 
@@ -54,7 +52,7 @@ static void assert_pch_dp_disabled(struct intel_display *display,
 				 port_name(port), pipe_name(pipe));
 
 	INTEL_DISPLAY_STATE_WARN(display,
-				 HAS_PCH_IBX(dev_priv) && !state && port_pipe == PIPE_B,
+				 HAS_PCH_IBX(display) && !state && port_pipe == PIPE_B,
 				 "IBX PCH DP %c still using transcoder B\n",
 				 port_name(port));
 }
@@ -63,7 +61,6 @@ static void assert_pch_hdmi_disabled(struct intel_display *display,
 				     enum pipe pipe, enum port port,
 				     i915_reg_t hdmi_reg)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	enum pipe port_pipe;
 	bool state;
 
@@ -74,7 +71,7 @@ static void assert_pch_hdmi_disabled(struct intel_display *display,
 				 port_name(port), pipe_name(pipe));
 
 	INTEL_DISPLAY_STATE_WARN(display,
-				 HAS_PCH_IBX(dev_priv) && !state && port_pipe == PIPE_B,
+				 HAS_PCH_IBX(display) && !state && port_pipe == PIPE_B,
 				 "IBX PCH HDMI %c still using transcoder B\n",
 				 port_name(port));
 }
@@ -249,7 +246,6 @@ static void ilk_enable_pch_transcoder(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_display *display = to_intel_display(crtc_state);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	i915_reg_t reg;
 	u32 val, pipeconf_val;
@@ -261,7 +257,7 @@ static void ilk_enable_pch_transcoder(const struct intel_crtc_state *crtc_state)
 	assert_fdi_tx_enabled(display, pipe);
 	assert_fdi_rx_enabled(display, pipe);
 
-	if (HAS_PCH_CPT(dev_priv)) {
+	if (HAS_PCH_CPT(display)) {
 		reg = TRANS_CHICKEN2(pipe);
 		val = intel_de_read(display, reg);
 		/*
@@ -279,7 +275,7 @@ static void ilk_enable_pch_transcoder(const struct intel_crtc_state *crtc_state)
 	val = intel_de_read(display, reg);
 	pipeconf_val = intel_de_read(display, TRANSCONF(display, pipe));
 
-	if (HAS_PCH_IBX(dev_priv)) {
+	if (HAS_PCH_IBX(display)) {
 		/* Configure frame start delay to match the CPU */
 		val &= ~TRANS_FRAME_START_DELAY_MASK;
 		val |= TRANS_FRAME_START_DELAY(crtc_state->framestart_delay - 1);
@@ -298,7 +294,7 @@ static void ilk_enable_pch_transcoder(const struct intel_crtc_state *crtc_state)
 
 	val &= ~TRANS_INTERLACE_MASK;
 	if ((pipeconf_val & TRANSCONF_INTERLACE_MASK_ILK) == TRANSCONF_INTERLACE_IF_ID_ILK) {
-		if (HAS_PCH_IBX(dev_priv) &&
+		if (HAS_PCH_IBX(display) &&
 		    intel_crtc_has_type(crtc_state, INTEL_OUTPUT_SDVO))
 			val |= TRANS_INTERLACE_LEGACY_VSYNC_IBX;
 		else
@@ -316,7 +312,6 @@ static void ilk_enable_pch_transcoder(const struct intel_crtc_state *crtc_state)
 static void ilk_disable_pch_transcoder(struct intel_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	i915_reg_t reg;
 
@@ -334,7 +329,7 @@ static void ilk_disable_pch_transcoder(struct intel_crtc *crtc)
 		drm_err(display->drm, "failed to disable transcoder %c\n",
 			pipe_name(pipe));
 
-	if (HAS_PCH_CPT(dev_priv))
+	if (HAS_PCH_CPT(display))
 		/* Workaround: Clear the timing override chicken bit again. */
 		intel_de_rmw(display, TRANS_CHICKEN2(pipe),
 			     TRANS_CHICKEN2_TIMING_OVERRIDE, 0);
@@ -366,7 +361,6 @@ void ilk_pch_enable(struct intel_atomic_state *state,
 		    struct intel_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	const struct intel_crtc_state *crtc_state =
 		intel_atomic_get_new_crtc_state(state, crtc);
 	enum pipe pipe = crtc->pipe;
@@ -381,7 +375,7 @@ void ilk_pch_enable(struct intel_atomic_state *state,
 	 * We need to program the right clock selection
 	 * before writing the pixel multiplier into the DPLL.
 	 */
-	if (HAS_PCH_CPT(dev_priv)) {
+	if (HAS_PCH_CPT(display)) {
 		u32 sel;
 
 		temp = intel_de_read(display, PCH_DPLL_SEL);
@@ -417,7 +411,7 @@ void ilk_pch_enable(struct intel_atomic_state *state,
 	intel_fdi_normal_train(crtc);
 
 	/* For PCH DP, enable TRANS_DP_CTL */
-	if (HAS_PCH_CPT(dev_priv) &&
+	if (HAS_PCH_CPT(display) &&
 	    intel_crtc_has_dp_encoder(crtc_state)) {
 		const struct drm_display_mode *adjusted_mode =
 			&crtc_state->hw.adjusted_mode;
@@ -459,14 +453,13 @@ void ilk_pch_post_disable(struct intel_atomic_state *state,
 			  struct intel_crtc *crtc)
 {
 	struct intel_display *display = to_intel_display(crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	const struct intel_crtc_state *old_crtc_state =
 		intel_atomic_get_old_crtc_state(state, crtc);
 	enum pipe pipe = crtc->pipe;
 
 	ilk_disable_pch_transcoder(crtc);
 
-	if (HAS_PCH_CPT(dev_priv)) {
+	if (HAS_PCH_CPT(display)) {
 		/* disable TRANS_DP_CTL */
 		intel_de_rmw(display, TRANS_DP_CTL(pipe),
 			     TRANS_DP_OUTPUT_ENABLE | TRANS_DP_PORT_SEL_MASK,
@@ -503,7 +496,6 @@ void ilk_pch_get_config(struct intel_crtc_state *crtc_state)
 {
 	struct intel_display *display = to_intel_display(crtc_state);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_shared_dpll *pll;
 	enum pipe pipe = crtc->pipe;
 	enum intel_dpll_id pll_id;
@@ -522,7 +514,7 @@ void ilk_pch_get_config(struct intel_crtc_state *crtc_state)
 	intel_cpu_transcoder_get_m1_n1(crtc, crtc_state->cpu_transcoder,
 				       &crtc_state->fdi_m_n);
 
-	if (HAS_PCH_IBX(dev_priv)) {
+	if (HAS_PCH_IBX(display)) {
 		/*
 		 * The pipe->pch transcoder and pch transcoder->pll
 		 * mapping is fixed.
@@ -646,8 +638,6 @@ void lpt_pch_get_config(struct intel_crtc_state *crtc_state)
 
 void intel_pch_sanitize(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
-	if (HAS_PCH_IBX(i915))
+	if (HAS_PCH_IBX(display))
 		ibx_sanitize_pch_ports(display);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_pch_refclk.c b/drivers/gpu/drm/i915/display/intel_pch_refclk.c
index 1307a478861a..693b90e3dfc3 100644
--- a/drivers/gpu/drm/i915/display/intel_pch_refclk.c
+++ b/drivers/gpu/drm/i915/display/intel_pch_refclk.c
@@ -281,7 +281,7 @@ static void lpt_enable_clkout_dp(struct intel_display *display,
 	if (drm_WARN(display->drm, with_fdi && !with_spread,
 		     "FDI requires downspread\n"))
 		with_spread = true;
-	if (drm_WARN(display->drm, HAS_PCH_LPT_LP(dev_priv) &&
+	if (drm_WARN(display->drm, HAS_PCH_LPT_LP(display) &&
 		     with_fdi, "LP PCH doesn't have FDI\n"))
 		with_fdi = false;
 
@@ -303,7 +303,7 @@ static void lpt_enable_clkout_dp(struct intel_display *display,
 			lpt_fdi_program_mphy(display);
 	}
 
-	reg = HAS_PCH_LPT_LP(dev_priv) ? SBI_GEN0 : SBI_DBUFF0;
+	reg = HAS_PCH_LPT_LP(display) ? SBI_GEN0 : SBI_DBUFF0;
 	tmp = intel_sbi_read(dev_priv, reg, SBI_ICLK);
 	tmp |= SBI_GEN0_CFG_BUFFENABLE_DISABLE;
 	intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK);
@@ -319,7 +319,7 @@ void lpt_disable_clkout_dp(struct intel_display *display)
 
 	intel_sbi_lock(dev_priv);
 
-	reg = HAS_PCH_LPT_LP(dev_priv) ? SBI_GEN0 : SBI_DBUFF0;
+	reg = HAS_PCH_LPT_LP(display) ? SBI_GEN0 : SBI_DBUFF0;
 	tmp = intel_sbi_read(dev_priv, reg, SBI_ICLK);
 	tmp &= ~SBI_GEN0_CFG_BUFFENABLE_DISABLE;
 	intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK);
@@ -498,7 +498,6 @@ static void lpt_init_pch_refclk(struct intel_display *display)
 
 static void ilk_init_pch_refclk(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_encoder *encoder;
 	struct intel_shared_dpll *pll;
 	int i;
@@ -527,7 +526,7 @@ static void ilk_init_pch_refclk(struct intel_display *display)
 		}
 	}
 
-	if (HAS_PCH_IBX(dev_priv)) {
+	if (HAS_PCH_IBX(display)) {
 		has_ck505 = display->vbt.display_clock_mode;
 		can_ssc = has_ck505;
 	} else {
@@ -678,10 +677,8 @@ static void ilk_init_pch_refclk(struct intel_display *display)
  */
 void intel_init_pch_refclk(struct intel_display *display)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
-	if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv))
+	if (HAS_PCH_IBX(display) || HAS_PCH_CPT(display))
 		ilk_init_pch_refclk(display);
-	else if (HAS_PCH_LPT(dev_priv))
+	else if (HAS_PCH_LPT(display))
 		lpt_init_pch_refclk(display);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c
index b0c4892775ce..c00d9184c586 100644
--- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
+++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
@@ -53,9 +53,11 @@ intel_reuse_initial_plane_obj(struct intel_crtc *this,
 }
 
 static enum intel_memory_type
-initial_plane_memory_type(struct drm_i915_private *i915)
+initial_plane_memory_type(struct intel_display *display)
 {
-	if (IS_DGFX(i915))
+	struct drm_i915_private *i915 = to_i915(display->drm);
+
+	if (display->platform.dgfx)
 		return INTEL_MEMORY_LOCAL;
 	else if (HAS_LMEMBAR_SMEM_STOLEN(i915))
 		return INTEL_MEMORY_STOLEN_LOCAL;
@@ -75,7 +77,7 @@ initial_plane_phys(struct intel_display *display,
 	dma_addr_t dma_addr;
 	u32 base;
 
-	mem_type = initial_plane_memory_type(i915);
+	mem_type = initial_plane_memory_type(display);
 	mem = intel_memory_region_by_type(i915, mem_type);
 	if (!mem) {
 		drm_dbg_kms(display->drm,
diff --git a/drivers/gpu/drm/i915/display/intel_pmdemand.c b/drivers/gpu/drm/i915/display/intel_pmdemand.c
index d22b5469672d..1253376c7654 100644
--- a/drivers/gpu/drm/i915/display/intel_pmdemand.c
+++ b/drivers/gpu/drm/i915/display/intel_pmdemand.c
@@ -478,13 +478,34 @@ static bool intel_pmdemand_req_complete(struct intel_display *display)
 		 XELPDP_PMDEMAND_REQ_ENABLE);
 }
 
-static void intel_pmdemand_wait(struct intel_display *display)
+static void intel_pmdemand_poll(struct intel_display *display)
 {
-	if (!wait_event_timeout(display->pmdemand.waitqueue,
-				intel_pmdemand_req_complete(display),
-				msecs_to_jiffies_timeout(10)))
+	const unsigned int timeout_ms = 10;
+	u32 status;
+	int ret;
+
+	ret = intel_de_wait_custom(display, XELPDP_INITIATE_PMDEMAND_REQUEST(1),
+				   XELPDP_PMDEMAND_REQ_ENABLE, 0,
+				   50, timeout_ms, &status);
+
+	if (ret == -ETIMEDOUT)
 		drm_err(display->drm,
-			"timed out waiting for Punit PM Demand Response\n");
+			"timed out waiting for Punit PM Demand Response within %ums (status 0x%08x)\n",
+			timeout_ms, status);
+}
+
+static void intel_pmdemand_wait(struct intel_display *display)
+{
+	/* Wa_14024400148 For lnl use polling method */
+	if (DISPLAY_VER(display) == 20) {
+		intel_pmdemand_poll(display);
+	} else {
+		if (!wait_event_timeout(display->pmdemand.waitqueue,
+					intel_pmdemand_req_complete(display),
+					msecs_to_jiffies_timeout(10)))
+			drm_err(display->drm,
+				"timed out waiting for Punit PM Demand Response\n");
+	}
 }
 
 /* Required to be programmed during Display Init Sequences. */
diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c
index 4d4e2b9f5f2d..05e1e5c7e8b7 100644
--- a/drivers/gpu/drm/i915/display/intel_pps.c
+++ b/drivers/gpu/drm/i915/display/intel_pps.c
@@ -350,21 +350,19 @@ vlv_initial_power_sequencer_setup(struct intel_dp *intel_dp)
 
 static int intel_num_pps(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
 	if (display->platform.valleyview || display->platform.cherryview)
 		return 2;
 
 	if (display->platform.geminilake || display->platform.broxton)
 		return 2;
 
-	if (INTEL_PCH_TYPE(i915) >= PCH_MTL)
+	if (INTEL_PCH_TYPE(display) >= PCH_MTL)
 		return 2;
 
-	if (INTEL_PCH_TYPE(i915) >= PCH_DG1)
+	if (INTEL_PCH_TYPE(display) >= PCH_DG1)
 		return 1;
 
-	if (INTEL_PCH_TYPE(i915) >= PCH_ICP)
+	if (INTEL_PCH_TYPE(display) >= PCH_ICP)
 		return 2;
 
 	return 1;
@@ -373,11 +371,10 @@ static int intel_num_pps(struct intel_display *display)
 static bool intel_pps_is_valid(struct intel_dp *intel_dp)
 {
 	struct intel_display *display = to_intel_display(intel_dp);
-	struct drm_i915_private *i915 = to_i915(display->drm);
 
 	if (intel_dp->pps.pps_idx == 1 &&
-	    INTEL_PCH_TYPE(i915) >= PCH_ICP &&
-	    INTEL_PCH_TYPE(i915) <= PCH_ADP)
+	    INTEL_PCH_TYPE(display) >= PCH_ICP &&
+	    INTEL_PCH_TYPE(display) <= PCH_ADP)
 		return intel_de_read(display, SOUTH_CHICKEN1) & ICP_SECOND_PPS_IO_SELECT;
 
 	return true;
@@ -499,7 +496,6 @@ static void intel_pps_get_registers(struct intel_dp *intel_dp,
 				    struct pps_registers *regs)
 {
 	struct intel_display *display = to_intel_display(intel_dp);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	int pps_idx;
 
 	memset(regs, 0, sizeof(*regs));
@@ -518,7 +514,7 @@ static void intel_pps_get_registers(struct intel_dp *intel_dp,
 
 	/* Cycle delay moved from PP_DIVISOR to PP_CONTROL */
 	if (display->platform.geminilake || display->platform.broxton ||
-	    INTEL_PCH_TYPE(dev_priv) >= PCH_CNP)
+	    INTEL_PCH_TYPE(display) >= PCH_CNP)
 		regs->pp_div = INVALID_MMIO_REG;
 	else
 		regs->pp_div = PP_DIVISOR(display, pps_idx);
@@ -1591,7 +1587,6 @@ static void pps_init_delays(struct intel_dp *intel_dp)
 static void pps_init_registers(struct intel_dp *intel_dp, bool force_disable_vdd)
 {
 	struct intel_display *display = to_intel_display(intel_dp);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	u32 pp_on, pp_off, port_sel = 0;
 	int div = DISPLAY_RUNTIME_INFO(display)->rawclk_freq / 1000;
 	struct pps_registers regs;
@@ -1638,7 +1633,7 @@ static void pps_init_registers(struct intel_dp *intel_dp, bool force_disable_vdd
 	 * power sequencer any more. */
 	if (display->platform.valleyview || display->platform.cherryview) {
 		port_sel = PANEL_PORT_SELECT_VLV(port);
-	} else if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) {
+	} else if (HAS_PCH_IBX(display) || HAS_PCH_CPT(display)) {
 		switch (port) {
 		case PORT_A:
 			port_sel = PANEL_PORT_SELECT_DPA;
@@ -1791,9 +1786,7 @@ void intel_pps_unlock_regs_wa(struct intel_display *display)
 
 void intel_pps_setup(struct intel_display *display)
 {
-	struct drm_i915_private *i915 = to_i915(display->drm);
-
-	if (HAS_PCH_SPLIT(i915) || display->platform.geminilake || display->platform.broxton)
+	if (HAS_PCH_SPLIT(display) || display->platform.geminilake || display->platform.broxton)
 		display->pps.mmio_base = PCH_PPS_BASE;
 	else if (display->platform.valleyview || display->platform.cherryview)
 		display->pps.mmio_base = VLV_PPS_BASE;
@@ -1836,7 +1829,6 @@ void intel_pps_connector_debugfs_add(struct intel_connector *connector)
 
 void assert_pps_unlocked(struct intel_display *display, enum pipe pipe)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	i915_reg_t pp_reg;
 	u32 val;
 	enum pipe panel_pipe = INVALID_PIPE;
@@ -1845,7 +1837,7 @@ void assert_pps_unlocked(struct intel_display *display, enum pipe pipe)
 	if (drm_WARN_ON(display->drm, HAS_DDI(display)))
 		return;
 
-	if (HAS_PCH_SPLIT(dev_priv)) {
+	if (HAS_PCH_SPLIT(display)) {
 		u32 port_sel;
 
 		pp_reg = PP_CONTROL(display, 0);
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index eef48c014112..ccd66bbc72f7 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -26,6 +26,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_damage_helper.h>
 #include <drm/drm_debugfs.h>
+#include <drm/drm_vblank.h>
 
 #include "i915_drv.h"
 #include "i915_reg.h"
@@ -38,6 +39,7 @@
 #include "intel_display_irq.h"
 #include "intel_display_rpm.h"
 #include "intel_display_types.h"
+#include "intel_dmc.h"
 #include "intel_dp.h"
 #include "intel_dp_aux.h"
 #include "intel_frontbuffer.h"
@@ -46,6 +48,7 @@
 #include "intel_psr_regs.h"
 #include "intel_snps_phy.h"
 #include "intel_vblank.h"
+#include "intel_vrr.h"
 #include "skl_universal_plane.h"
 
 /**
@@ -794,32 +797,9 @@ static void _psr_enable_sink(struct intel_dp *intel_dp,
 	drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, val);
 }
 
-static void intel_psr_enable_sink_alpm(struct intel_dp *intel_dp,
-				       const struct intel_crtc_state *crtc_state)
-{
-	u8 val;
-
-	/*
-	 * eDP Panel Replay uses always ALPM
-	 * PSR2 uses ALPM but PSR1 doesn't
-	 */
-	if (!intel_dp_is_edp(intel_dp) || (!crtc_state->has_panel_replay &&
-					   !crtc_state->has_sel_update))
-		return;
-
-	val = DP_ALPM_ENABLE | DP_ALPM_LOCK_ERROR_IRQ_HPD_ENABLE;
-
-	if (crtc_state->has_panel_replay)
-		val |= DP_ALPM_MODE_AUX_LESS;
-
-	drm_dp_dpcd_writeb(&intel_dp->aux, DP_RECEIVER_ALPM_CONFIG, val);
-}
-
 static void intel_psr_enable_sink(struct intel_dp *intel_dp,
 				  const struct intel_crtc_state *crtc_state)
 {
-	intel_psr_enable_sink_alpm(intel_dp, crtc_state);
-
 	crtc_state->has_panel_replay ?
 		_panel_replay_enable_sink(intel_dp, crtc_state) :
 		_psr_enable_sink(intel_dp, crtc_state);
@@ -905,6 +885,18 @@ static u8 psr_compute_idle_frames(struct intel_dp *intel_dp)
 	return idle_frames;
 }
 
+static bool is_dc5_dc6_blocked(struct intel_dp *intel_dp)
+{
+	struct intel_display *display = to_intel_display(intel_dp);
+	u32 current_dc_state = intel_display_power_get_current_dc_state(display);
+	struct drm_vblank_crtc *vblank = &display->drm->vblank[intel_dp->psr.pipe];
+
+	return (current_dc_state != DC_STATE_EN_UPTO_DC5 &&
+		current_dc_state != DC_STATE_EN_UPTO_DC6) ||
+		intel_dp->psr.active_non_psr_pipes ||
+		READ_ONCE(vblank->enabled);
+}
+
 static void hsw_activate_psr1(struct intel_dp *intel_dp)
 {
 	struct intel_display *display = to_intel_display(intel_dp);
@@ -933,6 +925,14 @@ static void hsw_activate_psr1(struct intel_dp *intel_dp)
 
 	intel_de_rmw(display, psr_ctl_reg(display, cpu_transcoder),
 		     ~EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK, val);
+
+	/* Wa_16025596647 */
+	if ((DISPLAY_VER(display) == 20 ||
+	     IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0)) &&
+	    is_dc5_dc6_blocked(intel_dp))
+		intel_dmc_start_pkgc_exit_at_start_of_undelayed_vblank(display,
+								       intel_dp->psr.pipe,
+								       true);
 }
 
 static u32 intel_psr2_get_tp_time(struct intel_dp *intel_dp)
@@ -1014,8 +1014,16 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp)
 	enum transcoder cpu_transcoder = intel_dp->psr.transcoder;
 	u32 val = EDP_PSR2_ENABLE;
 	u32 psr_val = 0;
+	u8 idle_frames;
 
-	val |= EDP_PSR2_IDLE_FRAMES(psr_compute_idle_frames(intel_dp));
+	/* Wa_16025596647 */
+	if ((DISPLAY_VER(display) == 20 ||
+	     IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0)) &&
+	    is_dc5_dc6_blocked(intel_dp))
+		idle_frames = 0;
+	else
+		idle_frames = psr_compute_idle_frames(intel_dp);
+	val |= EDP_PSR2_IDLE_FRAMES(idle_frames);
 
 	if (DISPLAY_VER(display) < 14 && !display->platform.alderlake_p)
 		val |= EDP_SU_TRACK_ENABLE;
@@ -1649,6 +1657,9 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
 {
 	struct intel_display *display = to_intel_display(intel_dp);
 	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
+	struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state);
+	struct intel_crtc *crtc;
+	u8 active_pipes = 0;
 
 	if (!psr_global_enabled(intel_dp)) {
 		drm_dbg_kms(display->drm, "PSR disabled by flag\n");
@@ -1702,6 +1713,24 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
 		drm_dbg_kms(display->drm,
 			    "PSR disabled to workaround PSR FSM hang issue\n");
 	}
+
+	/* Rest is for Wa_16025596647 */
+	if (DISPLAY_VER(display) != 20 &&
+	    !IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0))
+		return;
+
+	/* Not needed by Panel Replay  */
+	if (crtc_state->has_panel_replay)
+		return;
+
+	/* We ignore possible secondary PSR/Panel Replay capable eDP */
+	for_each_intel_crtc(display->drm, crtc)
+		active_pipes |= crtc->active ? BIT(crtc->pipe) : 0;
+
+	active_pipes = intel_calc_active_pipes(state, active_pipes);
+
+	crtc_state->active_non_psr_pipes = active_pipes &
+		~BIT(to_intel_crtc(crtc_state->uapi.crtc)->pipe);
 }
 
 void intel_psr_get_config(struct intel_encoder *encoder,
@@ -1893,9 +1922,6 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp,
 			     intel_dp->psr.psr2_sel_fetch_enabled ?
 			     IGNORE_PSR2_HW_TRACKING : 0);
 
-	if (intel_dp_is_edp(intel_dp))
-		intel_alpm_configure(intel_dp, crtc_state);
-
 	/*
 	 * Wa_16013835468
 	 * Wa_14015648006
@@ -1930,6 +1956,12 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp,
 			intel_de_rmw(display, CLKGATE_DIS_MISC, 0,
 				     CLKGATE_DIS_MISC_DMASC_GATING_DIS);
 	}
+
+	/* Wa_16025596647 */
+	if ((DISPLAY_VER(display) == 20 ||
+	     IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0)) &&
+	    !intel_dp->psr.panel_replay_enabled)
+		intel_dmc_block_pkgc(display, intel_dp->psr.pipe, true);
 }
 
 static bool psr_interrupt_error_check(struct intel_dp *intel_dp)
@@ -1985,6 +2017,7 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp,
 	intel_dp->psr.psr2_sel_fetch_cff_enabled = false;
 	intel_dp->psr.req_psr2_sdp_prior_scanline =
 		crtc_state->req_psr2_sdp_prior_scanline;
+	intel_dp->psr.active_non_psr_pipes = crtc_state->active_non_psr_pipes;
 
 	if (!psr_interrupt_error_check(intel_dp))
 		return;
@@ -2060,6 +2093,12 @@ static void intel_psr_exit(struct intel_dp *intel_dp)
 
 		drm_WARN_ON(display->drm, !(val & EDP_PSR2_ENABLE));
 	} else {
+		if (DISPLAY_VER(display) == 20 ||
+		    IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0))
+			intel_dmc_start_pkgc_exit_at_start_of_undelayed_vblank(display,
+								       intel_dp->psr.pipe,
+								       false);
+
 		val = intel_de_rmw(display,
 				   psr_ctl_reg(display, cpu_transcoder),
 				   EDP_PSR_ENABLE, 0);
@@ -2133,17 +2172,6 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp)
 	if (intel_dp_is_edp(intel_dp))
 		intel_snps_phy_update_psr_power_state(&dp_to_dig_port(intel_dp)->base, false);
 
-	/* Panel Replay on eDP is always using ALPM aux less. */
-	if (intel_dp->psr.panel_replay_enabled && intel_dp_is_edp(intel_dp)) {
-		intel_de_rmw(display, ALPM_CTL(display, cpu_transcoder),
-			     ALPM_CTL_ALPM_ENABLE |
-			     ALPM_CTL_ALPM_AUX_LESS_ENABLE, 0);
-
-		intel_de_rmw(display,
-			     PORT_ALPM_CTL(cpu_transcoder),
-			     PORT_ALPM_CTL_ALPM_AUX_LESS_ENABLE, 0);
-	}
-
 	/* Disable PSR on Sink */
 	if (!intel_dp->psr.panel_replay_enabled) {
 		drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, 0);
@@ -2153,12 +2181,19 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp)
 					   DP_RECEIVER_ALPM_CONFIG, 0);
 	}
 
+	/* Wa_16025596647 */
+	if ((DISPLAY_VER(display) == 20 ||
+	     IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0)) &&
+	    !intel_dp->psr.panel_replay_enabled)
+		intel_dmc_block_pkgc(display, intel_dp->psr.pipe, false);
+
 	intel_dp->psr.enabled = false;
 	intel_dp->psr.panel_replay_enabled = false;
 	intel_dp->psr.sel_update_enabled = false;
 	intel_dp->psr.psr2_sel_fetch_enabled = false;
 	intel_dp->psr.su_region_et_enabled = false;
 	intel_dp->psr.psr2_sel_fetch_cff_enabled = false;
+	intel_dp->psr.active_non_psr_pipes = 0;
 }
 
 /**
@@ -2254,17 +2289,20 @@ out:
 }
 
 /**
- * intel_psr_needs_block_dc_vblank - Check if block dc entry is needed
+ * intel_psr_needs_vblank_notification - Check if PSR need vblank enable/disable
+ * notification.
  * @crtc_state: CRTC status
  *
  * We need to block DC6 entry in case of Panel Replay as enabling VBI doesn't
  * prevent it in case of Panel Replay. Panel Replay switches main link off on
  * DC entry. This means vblank interrupts are not fired and is a problem if
- * user-space is polling for vblank events.
+ * user-space is polling for vblank events. Also Wa_16025596647 needs
+ * information when vblank is enabled/disabled.
  */
-bool intel_psr_needs_block_dc_vblank(const struct intel_crtc_state *crtc_state)
+bool intel_psr_needs_vblank_notification(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct intel_display *display = to_intel_display(crtc_state);
 	struct intel_encoder *encoder;
 
 	for_each_encoder_on_crtc(crtc->base.dev, &crtc->base, encoder) {
@@ -2275,8 +2313,15 @@ bool intel_psr_needs_block_dc_vblank(const struct intel_crtc_state *crtc_state)
 
 		intel_dp = enc_to_intel_dp(encoder);
 
-		if (intel_dp_is_edp(intel_dp) &&
-		    CAN_PANEL_REPLAY(intel_dp))
+		if (!intel_dp_is_edp(intel_dp))
+			continue;
+
+		if (CAN_PANEL_REPLAY(intel_dp))
+			return true;
+
+		if ((DISPLAY_VER(display) == 20 ||
+		     IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0)) &&
+		    CAN_PSR(intel_dp))
 			return true;
 	}
 
@@ -2304,6 +2349,53 @@ void intel_psr_trigger_frame_change_event(struct intel_dsb *dsb,
 				   CURSURFLIVE(display, crtc->pipe), 0);
 }
 
+/**
+ * intel_psr_min_vblank_delay - Minimum vblank delay needed by PSR
+ * @crtc_state: the crtc state
+ *
+ * Return minimum vblank delay needed by PSR.
+ */
+int intel_psr_min_vblank_delay(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_display *display = to_intel_display(crtc_state);
+
+	if (!crtc_state->has_psr)
+		return 0;
+
+	/* Wa_14015401596 */
+	if (intel_vrr_possible(crtc_state) && IS_DISPLAY_VER(display, 13, 14))
+		return 1;
+
+	/* Rest is for SRD_STATUS needed on LunarLake and onwards */
+	if (DISPLAY_VER(display) < 20)
+		return 0;
+
+	/*
+	 * Comment on SRD_STATUS register in Bspec for LunarLake and onwards:
+	 *
+	 * To deterministically capture the transition of the state machine
+	 * going from SRDOFFACK to IDLE, the delayed V. Blank should be at least
+	 * one line after the non-delayed V. Blank.
+	 *
+	 * Legacy TG: TRANS_SET_CONTEXT_LATENCY > 0
+	 * VRR TG: TRANS_VRR_CTL[ VRR Guardband ] < (TRANS_VRR_VMAX[ VRR Vmax ]
+	 * - TRANS_VTOTAL[ Vertical Active ])
+	 *
+	 * SRD_STATUS is used only by PSR1 on PantherLake.
+	 * SRD_STATUS is used by PSR1 and Panel Replay DP on LunarLake.
+	 */
+
+	if (DISPLAY_VER(display) >= 30 && (crtc_state->has_panel_replay ||
+					   crtc_state->has_sel_update))
+		return 0;
+	else if (DISPLAY_VER(display) < 30 && (crtc_state->has_sel_update ||
+					       intel_crtc_has_type(crtc_state,
+								   INTEL_OUTPUT_EDP)))
+		return 0;
+	else
+		return 1;
+}
+
 static u32 man_trk_ctl_enable_bit_get(struct intel_display *display)
 {
 	return display->platform.alderlake_p || DISPLAY_VER(display) >= 14 ? 0 :
@@ -3398,29 +3490,15 @@ static int psr_get_status_and_error_status(struct intel_dp *intel_dp,
 
 static void psr_alpm_check(struct intel_dp *intel_dp)
 {
-	struct intel_display *display = to_intel_display(intel_dp);
-	struct drm_dp_aux *aux = &intel_dp->aux;
 	struct intel_psr *psr = &intel_dp->psr;
-	u8 val;
-	int r;
 
 	if (!psr->sel_update_enabled)
 		return;
 
-	r = drm_dp_dpcd_readb(aux, DP_RECEIVER_ALPM_STATUS, &val);
-	if (r != 1) {
-		drm_err(display->drm, "Error reading ALPM status\n");
-		return;
-	}
-
-	if (val & DP_ALPM_LOCK_TIMEOUT_ERROR) {
+	if (intel_alpm_get_error(intel_dp)) {
 		intel_psr_disable_locked(intel_dp);
 		psr->sink_not_reliable = true;
-		drm_dbg_kms(display->drm,
-			    "ALPM lock timeout error, disabling PSR\n");
-
-		/* Clearing error */
-		drm_dp_dpcd_writeb(aux, DP_RECEIVER_ALPM_STATUS, val);
+		intel_alpm_disable(intel_dp);
 	}
 }
 
@@ -3605,6 +3683,168 @@ void intel_psr_unlock(const struct intel_crtc_state *crtc_state)
 	}
 }
 
+/* Wa_16025596647 */
+static void intel_psr_apply_underrun_on_idle_wa_locked(struct intel_dp *intel_dp)
+{
+	struct intel_display *display = to_intel_display(intel_dp);
+	bool dc5_dc6_blocked;
+
+	if (!intel_dp->psr.active)
+		return;
+
+	dc5_dc6_blocked = is_dc5_dc6_blocked(intel_dp);
+
+	if (intel_dp->psr.sel_update_enabled)
+		psr2_program_idle_frames(intel_dp, dc5_dc6_blocked ? 0 :
+					 psr_compute_idle_frames(intel_dp));
+	else
+		intel_dmc_start_pkgc_exit_at_start_of_undelayed_vblank(display,
+								       intel_dp->psr.pipe,
+								       dc5_dc6_blocked);
+}
+
+static void psr_dc5_dc6_wa_work(struct work_struct *work)
+{
+	struct intel_display *display = container_of(work, typeof(*display),
+						     psr_dc5_dc6_wa_work);
+	struct intel_encoder *encoder;
+
+	for_each_intel_encoder_with_psr(display->drm, encoder) {
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+
+		mutex_lock(&intel_dp->psr.lock);
+
+		if (intel_dp->psr.enabled && !intel_dp->psr.panel_replay_enabled)
+			intel_psr_apply_underrun_on_idle_wa_locked(intel_dp);
+
+		mutex_unlock(&intel_dp->psr.lock);
+	}
+}
+
+/**
+ * intel_psr_notify_dc5_dc6 - Notify PSR about enable/disable dc5/dc6
+ * @display: intel atomic state
+ *
+ * This is targeted for underrun on idle PSR HW bug (Wa_16025596647) to schedule
+ * psr_dc5_dc6_wa_work used for applying/removing the workaround.
+ */
+void intel_psr_notify_dc5_dc6(struct intel_display *display)
+{
+	if (DISPLAY_VER(display) != 20 &&
+	    !IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0))
+		return;
+
+	schedule_work(&display->psr_dc5_dc6_wa_work);
+}
+
+/**
+ * intel_psr_dc5_dc6_wa_init - Init work for underrun on idle PSR HW bug wa
+ * @display: intel atomic state
+ *
+ * This is targeted for underrun on idle PSR HW bug (Wa_16025596647) to init
+ * psr_dc5_dc6_wa_work used for applying the workaround.
+ */
+void intel_psr_dc5_dc6_wa_init(struct intel_display *display)
+{
+	if (DISPLAY_VER(display) != 20 &&
+	    !IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0))
+		return;
+
+	INIT_WORK(&display->psr_dc5_dc6_wa_work, psr_dc5_dc6_wa_work);
+}
+
+/**
+ * intel_psr_notify_pipe_change - Notify PSR about enable/disable of a pipe
+ * @state: intel atomic state
+ * @crtc: intel crtc
+ * @enable: enable/disable
+ *
+ * This is targeted for underrun on idle PSR HW bug (Wa_16025596647) to apply
+ * remove the workaround when pipe is getting enabled/disabled
+ */
+void intel_psr_notify_pipe_change(struct intel_atomic_state *state,
+				  struct intel_crtc *crtc, bool enable)
+{
+	struct intel_display *display = to_intel_display(state);
+	struct intel_encoder *encoder;
+
+	if (DISPLAY_VER(display) != 20 &&
+	    !IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0))
+		return;
+
+	for_each_intel_encoder_with_psr(display->drm, encoder) {
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+		u8 active_non_psr_pipes;
+
+		mutex_lock(&intel_dp->psr.lock);
+
+		if (!intel_dp->psr.enabled || intel_dp->psr.panel_replay_enabled)
+			goto unlock;
+
+		active_non_psr_pipes = intel_dp->psr.active_non_psr_pipes;
+
+		if (enable)
+			active_non_psr_pipes |= BIT(crtc->pipe);
+		else
+			active_non_psr_pipes &= ~BIT(crtc->pipe);
+
+		if (active_non_psr_pipes == intel_dp->psr.active_non_psr_pipes)
+			goto unlock;
+
+		if ((enable && intel_dp->psr.active_non_psr_pipes) ||
+		    (!enable && !intel_dp->psr.active_non_psr_pipes)) {
+			intel_dp->psr.active_non_psr_pipes = active_non_psr_pipes;
+			goto unlock;
+		}
+
+		intel_dp->psr.active_non_psr_pipes = active_non_psr_pipes;
+
+		intel_psr_apply_underrun_on_idle_wa_locked(intel_dp);
+unlock:
+		mutex_unlock(&intel_dp->psr.lock);
+	}
+}
+
+/**
+ * intel_psr_notify_vblank_enable_disable - Notify PSR about enable/disable of vblank
+ * @display: intel display struct
+ * @enable: enable/disable
+ *
+ * This is targeted for underrun on idle PSR HW bug (Wa_16025596647) to apply
+ * remove the workaround when vblank is getting enabled/disabled
+ */
+void intel_psr_notify_vblank_enable_disable(struct intel_display *display,
+					    bool enable)
+{
+	struct intel_encoder *encoder;
+
+	for_each_intel_encoder_with_psr(display->drm, encoder) {
+		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+
+		mutex_lock(&intel_dp->psr.lock);
+		if (intel_dp->psr.panel_replay_enabled) {
+			mutex_unlock(&intel_dp->psr.lock);
+			break;
+		}
+
+		if (intel_dp->psr.enabled)
+			intel_psr_apply_underrun_on_idle_wa_locked(intel_dp);
+
+		mutex_unlock(&intel_dp->psr.lock);
+		return;
+	}
+
+	/*
+	 * NOTE: intel_display_power_set_target_dc_state is used
+	 * only by PSR * code for DC3CO handling. DC3CO target
+	 * state is currently disabled in * PSR code. If DC3CO
+	 * is taken into use we need take that into account here
+	 * as well.
+	 */
+	intel_display_power_set_target_dc_state(display, enable ? DC_STATE_DISABLE :
+						DC_STATE_EN_UPTO_DC6);
+}
+
 static void
 psr_source_status(struct intel_dp *intel_dp, struct seq_file *m)
 {
@@ -3977,3 +4217,13 @@ void intel_psr_connector_debugfs_add(struct intel_connector *connector)
 		debugfs_create_file("i915_psr_status", 0444, root,
 				    connector, &i915_psr_status_fops);
 }
+
+bool intel_psr_needs_alpm(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state)
+{
+	/*
+	 * eDP Panel Replay uses always ALPM
+	 * PSR2 uses ALPM but PSR1 doesn't
+	 */
+	return intel_dp_is_edp(intel_dp) && (crtc_state->has_sel_update ||
+					     crtc_state->has_panel_replay);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
index a43a374cff55..73c3fa40844b 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.h
+++ b/drivers/gpu/drm/i915/display/intel_psr.h
@@ -59,7 +59,13 @@ void intel_psr2_program_trans_man_trk_ctl(struct intel_dsb *dsb,
 					  const struct intel_crtc_state *crtc_state);
 void intel_psr_pause(struct intel_dp *intel_dp);
 void intel_psr_resume(struct intel_dp *intel_dp);
-bool intel_psr_needs_block_dc_vblank(const struct intel_crtc_state *crtc_state);
+bool intel_psr_needs_vblank_notification(const struct intel_crtc_state *crtc_state);
+void intel_psr_notify_pipe_change(struct intel_atomic_state *state,
+				  struct intel_crtc *crtc, bool enable);
+void intel_psr_notify_dc5_dc6(struct intel_display *display);
+void intel_psr_dc5_dc6_wa_init(struct intel_display *display);
+void intel_psr_notify_vblank_enable_disable(struct intel_display *display,
+					    bool enable);
 bool intel_psr_link_ok(struct intel_dp *intel_dp);
 
 void intel_psr_lock(const struct intel_crtc_state *crtc_state);
@@ -67,7 +73,9 @@ void intel_psr_unlock(const struct intel_crtc_state *crtc_state);
 void intel_psr_trigger_frame_change_event(struct intel_dsb *dsb,
 					  struct intel_atomic_state *state,
 					  struct intel_crtc *crtc);
+int intel_psr_min_vblank_delay(const struct intel_crtc_state *crtc_state);
 void intel_psr_connector_debugfs_add(struct intel_connector *connector);
 void intel_psr_debugfs_register(struct intel_display *display);
+bool intel_psr_needs_alpm(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state);
 
 #endif /* __INTEL_PSR_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index 757b9ce7e3b1..8a38df2c0283 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -36,9 +36,9 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_eld.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
-#include "i915_drv.h"
 #include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_audio.h"
@@ -214,18 +214,17 @@ intel_sdvo_create_enhance_property(struct intel_sdvo *intel_sdvo,
 static void intel_sdvo_write_sdvox(struct intel_sdvo *intel_sdvo, u32 val)
 {
 	struct intel_display *display = to_intel_display(&intel_sdvo->base);
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	u32 bval = val, cval = val;
 	int i;
 
-	if (HAS_PCH_SPLIT(dev_priv)) {
+	if (HAS_PCH_SPLIT(display)) {
 		intel_de_write(display, intel_sdvo->sdvo_reg, val);
 		intel_de_posting_read(display, intel_sdvo->sdvo_reg);
 		/*
 		 * HW workaround, need to write this twice for issue
 		 * that may result in first write getting masked.
 		 */
-		if (HAS_PCH_IBX(dev_priv)) {
+		if (HAS_PCH_IBX(display)) {
 			intel_de_write(display, intel_sdvo->sdvo_reg, val);
 			intel_de_posting_read(display, intel_sdvo->sdvo_reg);
 		}
@@ -1360,14 +1359,13 @@ static int intel_sdvo_compute_config(struct intel_encoder *encoder,
 				     struct drm_connector_state *conn_state)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
 	struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
 	struct intel_sdvo_connector *intel_sdvo_connector =
 		to_intel_sdvo_connector(conn_state->connector);
 	struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
 	struct drm_display_mode *mode = &pipe_config->hw.mode;
 
-	if (HAS_PCH_SPLIT(i915)) {
+	if (HAS_PCH_SPLIT(display)) {
 		pipe_config->has_pch_encoder = true;
 		if (!intel_fdi_compute_pipe_bpp(pipe_config))
 			return -EINVAL;
@@ -1527,7 +1525,6 @@ static void intel_sdvo_pre_enable(struct intel_atomic_state *state,
 				  const struct drm_connector_state *conn_state)
 {
 	struct intel_display *display = to_intel_display(intel_encoder);
-	struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
 	const struct intel_sdvo_connector_state *sdvo_state =
@@ -1634,7 +1631,7 @@ static void intel_sdvo_pre_enable(struct intel_atomic_state *state,
 		sdvox |= (9 << 19) | SDVO_BORDER_ENABLE;
 	}
 
-	if (HAS_PCH_CPT(dev_priv))
+	if (HAS_PCH_CPT(display))
 		sdvox |= SDVO_PIPE_SEL_CPT(crtc->pipe);
 	else
 		sdvox |= SDVO_PIPE_SEL(crtc->pipe);
@@ -1670,13 +1667,12 @@ static bool intel_sdvo_connector_get_hw_state(struct intel_connector *connector)
 bool intel_sdvo_port_enabled(struct intel_display *display,
 			     i915_reg_t sdvo_reg, enum pipe *pipe)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	u32 val;
 
 	val = intel_de_read(display, sdvo_reg);
 
 	/* asserts want to know the pipe even if the port is disabled */
-	if (HAS_PCH_CPT(dev_priv))
+	if (HAS_PCH_CPT(display))
 		*pipe = (val & SDVO_PIPE_SEL_MASK_CPT) >> SDVO_PIPE_SEL_SHIFT_CPT;
 	else if (display->platform.cherryview)
 		*pipe = (val & SDVO_PIPE_SEL_MASK_CHV) >> SDVO_PIPE_SEL_SHIFT_CHV;
@@ -1841,7 +1837,6 @@ static void intel_disable_sdvo(struct intel_atomic_state *state,
 			       const struct drm_connector_state *conn_state)
 {
 	struct intel_display *display = to_intel_display(encoder);
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
 	u32 temp;
@@ -1861,7 +1856,7 @@ static void intel_disable_sdvo(struct intel_atomic_state *state,
 	 * to transcoder A after disabling it to allow the
 	 * matching DP port to be enabled on transcoder A.
 	 */
-	if (HAS_PCH_IBX(dev_priv) && crtc->pipe == PIPE_B) {
+	if (HAS_PCH_IBX(display) && crtc->pipe == PIPE_B) {
 		/*
 		 * We get CPU/PCH FIFO underruns on the other pipe when
 		 * doing the workaround. Sweep them under the rug.
@@ -3367,9 +3362,7 @@ intel_sdvo_init_ddc_proxy(struct intel_sdvo_ddc *ddc,
 
 static bool is_sdvo_port_valid(struct intel_display *display, enum port port)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
-
-	if (HAS_PCH_SPLIT(dev_priv))
+	if (HAS_PCH_SPLIT(display))
 		return port == PORT_B;
 	else
 		return port == PORT_B || port == PORT_C;
@@ -3384,7 +3377,6 @@ static bool assert_sdvo_port_valid(struct intel_display *display, enum port port
 bool intel_sdvo_init(struct intel_display *display,
 		     i915_reg_t sdvo_reg, enum port port)
 {
-	struct drm_i915_private *dev_priv = to_i915(display->drm);
 	struct intel_encoder *intel_encoder;
 	struct intel_sdvo *intel_sdvo;
 	int i;
@@ -3427,7 +3419,7 @@ bool intel_sdvo_init(struct intel_display *display,
 	}
 
 	intel_encoder->compute_config = intel_sdvo_compute_config;
-	if (HAS_PCH_SPLIT(dev_priv)) {
+	if (HAS_PCH_SPLIT(display)) {
 		intel_encoder->disable = pch_disable_sdvo;
 		intel_encoder->post_disable = pch_post_disable_sdvo;
 	} else {
diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c
index 1ad6c8a94b3d..fd92e6b89b43 100644
--- a/drivers/gpu/drm/i915/display/intel_sprite.c
+++ b/drivers/gpu/drm/i915/display/intel_sprite.c
@@ -36,9 +36,10 @@
 #include <drm/drm_blend.h>
 #include <drm/drm_color_mgmt.h>
 #include <drm/drm_fourcc.h>
+#include <drm/drm_print.h>
 #include <drm/drm_rect.h>
 
-#include "i915_drv.h"
+#include "i915_utils.h"
 #include "i9xx_plane.h"
 #include "intel_atomic_plane.h"
 #include "intel_de.h"
diff --git a/drivers/gpu/drm/i915/display/intel_sprite_uapi.c b/drivers/gpu/drm/i915/display/intel_sprite_uapi.c
index 1d0b84b464c1..4981cc34da05 100644
--- a/drivers/gpu/drm/i915/display/intel_sprite_uapi.c
+++ b/drivers/gpu/drm/i915/display/intel_sprite_uapi.c
@@ -3,21 +3,21 @@
  * Copyright © 2023 Intel Corporation
  */
 
-#include "i915_drv.h"
 #include "intel_crtc.h"
+#include "intel_display_core.h"
 #include "intel_display_types.h"
 #include "intel_sprite_uapi.h"
 
-static bool has_dst_key_in_primary_plane(struct drm_i915_private *dev_priv)
+static bool has_dst_key_in_primary_plane(struct intel_display *display)
 {
-	return DISPLAY_VER(dev_priv) >= 9;
+	return DISPLAY_VER(display) >= 9;
 }
 
 static void intel_plane_set_ckey(struct intel_plane_state *plane_state,
 				 const struct drm_intel_sprite_colorkey *set)
 {
+	struct intel_display *display = to_intel_display(plane_state);
 	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
-	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
 
 	*key = *set;
@@ -34,7 +34,7 @@ static void intel_plane_set_ckey(struct intel_plane_state *plane_state,
 	 * On SKL+ we want dst key enabled on
 	 * the primary and not on the sprite.
 	 */
-	if (DISPLAY_VER(dev_priv) >= 9 && plane->id != PLANE_PRIMARY &&
+	if (DISPLAY_VER(display) >= 9 && plane->id != PLANE_PRIMARY &&
 	    set->flags & I915_SET_COLORKEY_DESTINATION)
 		key->flags = 0;
 }
@@ -43,7 +43,6 @@ int intel_sprite_set_colorkey_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file_priv)
 {
 	struct intel_display *display = to_intel_display(dev);
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_intel_sprite_colorkey *set = data;
 	struct drm_plane *plane;
 	struct drm_plane_state *plane_state;
@@ -61,7 +60,7 @@ int intel_sprite_set_colorkey_ioctl(struct drm_device *dev, void *data,
 	if ((set->flags & (I915_SET_COLORKEY_DESTINATION | I915_SET_COLORKEY_SOURCE)) == (I915_SET_COLORKEY_DESTINATION | I915_SET_COLORKEY_SOURCE))
 		return -EINVAL;
 
-	if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
+	if ((display->platform.valleyview || display->platform.cherryview) &&
 	    set->flags & I915_SET_COLORKEY_DESTINATION)
 		return -EINVAL;
 
@@ -74,7 +73,7 @@ int intel_sprite_set_colorkey_ioctl(struct drm_device *dev, void *data,
 	 * Also multiple planes can't do destination keying on the same
 	 * pipe simultaneously.
 	 */
-	if (DISPLAY_VER(dev_priv) >= 9 &&
+	if (DISPLAY_VER(display) >= 9 &&
 	    to_intel_plane(plane)->id >= PLANE_3 &&
 	    set->flags & I915_SET_COLORKEY_DESTINATION)
 		return -EINVAL;
@@ -99,7 +98,7 @@ int intel_sprite_set_colorkey_ioctl(struct drm_device *dev, void *data,
 		 * On some platforms we have to configure
 		 * the dst colorkey on the primary plane.
 		 */
-		if (!ret && has_dst_key_in_primary_plane(dev_priv)) {
+		if (!ret && has_dst_key_in_primary_plane(display)) {
 			struct intel_crtc *crtc =
 				intel_crtc_for_pipe(display,
 						    to_intel_plane(plane)->pipe);
diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c
index 2e3f3f0207e8..acf0b3733908 100644
--- a/drivers/gpu/drm/i915/display/intel_tv.c
+++ b/drivers/gpu/drm/i915/display/intel_tv.c
@@ -33,15 +33,15 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
-#include "i915_drv.h"
 #include "i915_reg.h"
 #include "intel_connector.h"
 #include "intel_crtc.h"
 #include "intel_de.h"
-#include "intel_display_irq.h"
 #include "intel_display_driver.h"
+#include "intel_display_irq.h"
 #include "intel_display_types.h"
 #include "intel_dpll.h"
 #include "intel_hotplug.h"
@@ -1585,19 +1585,17 @@ intel_tv_detect_type(struct intel_tv *intel_tv,
 {
 	struct intel_display *display = to_intel_display(connector->dev);
 	struct intel_crtc *crtc = to_intel_crtc(connector->state->crtc);
-	struct drm_device *dev = connector->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	u32 tv_ctl, save_tv_ctl;
 	u32 tv_dac, save_tv_dac;
 	int type;
 
 	/* Disable TV interrupts around load detect or we'll recurse */
 	if (connector->polled & DRM_CONNECTOR_POLL_HPD) {
-		spin_lock_irq(&dev_priv->irq_lock);
+		spin_lock_irq(&display->irq.lock);
 		i915_disable_pipestat(display, 0,
 				      PIPE_HOTPLUG_INTERRUPT_STATUS |
 				      PIPE_HOTPLUG_TV_INTERRUPT_STATUS);
-		spin_unlock_irq(&dev_priv->irq_lock);
+		spin_unlock_irq(&display->irq.lock);
 	}
 
 	save_tv_dac = tv_dac = intel_de_read(display, TV_DAC);
@@ -1668,11 +1666,11 @@ intel_tv_detect_type(struct intel_tv *intel_tv,
 
 	/* Restore interrupt config */
 	if (connector->polled & DRM_CONNECTOR_POLL_HPD) {
-		spin_lock_irq(&dev_priv->irq_lock);
+		spin_lock_irq(&display->irq.lock);
 		i915_enable_pipestat(display, 0,
 				     PIPE_HOTPLUG_INTERRUPT_STATUS |
 				     PIPE_HOTPLUG_TV_INTERRUPT_STATUS);
-		spin_unlock_irq(&dev_priv->irq_lock);
+		spin_unlock_irq(&display->irq.lock);
 	}
 
 	return type;
diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c
index 684b5d1bc87c..05d140c8032d 100644
--- a/drivers/gpu/drm/i915/display/intel_vga.c
+++ b/drivers/gpu/drm/i915/display/intel_vga.c
@@ -4,15 +4,20 @@
  */
 
 #include <linux/delay.h>
+#include <linux/pci.h>
 #include <linux/vgaarb.h>
 
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
 #include <video/vga.h>
+
 #include "soc/intel_gmch.h"
 
-#include "i915_drv.h"
 #include "i915_reg.h"
 #include "intel_de.h"
+#include "intel_display.h"
 #include "intel_vga.h"
+#include "intel_vga_regs.h"
 
 static i915_reg_t intel_vga_cntrl_reg(struct intel_display *display)
 {
@@ -24,16 +29,42 @@ static i915_reg_t intel_vga_cntrl_reg(struct intel_display *display)
 		return VGACNTRL;
 }
 
+static bool has_vga_pipe_sel(struct intel_display *display)
+{
+	if (display->platform.i845g ||
+	    display->platform.i865g)
+		return false;
+
+	if (display->platform.valleyview ||
+	    display->platform.cherryview)
+		return true;
+
+	return DISPLAY_VER(display) < 7;
+}
+
 /* Disable the VGA plane that we never use */
 void intel_vga_disable(struct intel_display *display)
 {
 	struct pci_dev *pdev = to_pci_dev(display->drm->dev);
 	i915_reg_t vga_reg = intel_vga_cntrl_reg(display);
+	enum pipe pipe;
+	u32 tmp;
 	u8 sr1;
 
-	if (intel_de_read(display, vga_reg) & VGA_DISP_DISABLE)
+	tmp = intel_de_read(display, vga_reg);
+	if (tmp & VGA_DISP_DISABLE)
 		return;
 
+	if (display->platform.cherryview)
+		pipe = REG_FIELD_GET(VGA_PIPE_SEL_MASK_CHV, tmp);
+	else if (has_vga_pipe_sel(display))
+		pipe = REG_FIELD_GET(VGA_PIPE_SEL_MASK, tmp);
+	else
+		pipe = PIPE_A;
+
+	drm_dbg_kms(display->drm, "Disabling VGA plane on pipe %c\n",
+		    pipe_name(pipe));
+
 	/* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */
 	vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO);
 	outb(0x01, VGA_SEQ_I);
@@ -46,39 +77,6 @@ void intel_vga_disable(struct intel_display *display)
 	intel_de_posting_read(display, vga_reg);
 }
 
-void intel_vga_redisable_power_on(struct intel_display *display)
-{
-	i915_reg_t vga_reg = intel_vga_cntrl_reg(display);
-
-	if (!(intel_de_read(display, vga_reg) & VGA_DISP_DISABLE)) {
-		drm_dbg_kms(display->drm,
-			    "Something enabled VGA plane, disabling it\n");
-		intel_vga_disable(display);
-	}
-}
-
-void intel_vga_redisable(struct intel_display *display)
-{
-	intel_wakeref_t wakeref;
-
-	/*
-	 * This function can be called both from intel_modeset_setup_hw_state or
-	 * at a very early point in our resume sequence, where the power well
-	 * structures are not yet restored. Since this function is at a very
-	 * paranoid "someone might have enabled VGA while we were not looking"
-	 * level, just check if the power well is enabled instead of trying to
-	 * follow the "don't touch the power well if we don't need it" policy
-	 * the rest of the driver uses.
-	 */
-	wakeref = intel_display_power_get_if_enabled(display, POWER_DOMAIN_VGA);
-	if (!wakeref)
-		return;
-
-	intel_vga_redisable_power_on(display);
-
-	intel_display_power_put(display, POWER_DOMAIN_VGA, wakeref);
-}
-
 void intel_vga_reset_io_mem(struct intel_display *display)
 {
 	struct pci_dev *pdev = to_pci_dev(display->drm->dev);
diff --git a/drivers/gpu/drm/i915/display/intel_vga.h b/drivers/gpu/drm/i915/display/intel_vga.h
index 824dfc32a199..16d699f3b641 100644
--- a/drivers/gpu/drm/i915/display/intel_vga.h
+++ b/drivers/gpu/drm/i915/display/intel_vga.h
@@ -10,8 +10,6 @@ struct intel_display;
 
 void intel_vga_reset_io_mem(struct intel_display *display);
 void intel_vga_disable(struct intel_display *display);
-void intel_vga_redisable(struct intel_display *display);
-void intel_vga_redisable_power_on(struct intel_display *display);
 int intel_vga_register(struct intel_display *display);
 void intel_vga_unregister(struct intel_display *display);
 
diff --git a/drivers/gpu/drm/i915/display/intel_vga_regs.h b/drivers/gpu/drm/i915/display/intel_vga_regs.h
new file mode 100644
index 000000000000..cbacced1a69f
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_vga_regs.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef __INTEL_VGA_REGS_H__
+#define __INTEL_VGA_REGS_H__
+
+#include "intel_display_reg_defs.h"
+
+#define VGACNTRL	_MMIO(0x71400)
+#define VLV_VGACNTRL	_MMIO(VLV_DISPLAY_BASE + 0x71400)
+#define CPU_VGACNTRL	_MMIO(0x41000)
+#define   VGA_DISP_DISABLE			REG_BIT(31)
+#define   VGA_2X_MODE				REG_BIT(30) /* pre-ilk */
+#define   VGA_PIPE_SEL_MASK			REG_BIT(29) /* pre-ivb */
+#define   VGA_PIPE_SEL(pipe)			REG_FIELD_PREP(VGA_PIPE_SEL_MASK, (pipe))
+#define   VGA_PIPE_SEL_MASK_CHV			REG_GENMASK(29, 28) /* chv */
+#define   VGA_PIPE_SEL_CHV(pipe)		REG_FIELD_PREP(VGA_PIPE_SEL_MASK_CHV, (pipe))
+#define   VGA_BORDER_ENABLE			REG_BIT(26)
+#define   VGA_PIPE_CSC_ENABLE			REG_BIT(24) /* ilk+ */
+#define   VGA_CENTERING_ENABLE_MASK		REG_GENMASK(25, 24) /* pre-ilk */
+#define   VGA_PALETTE_READ_SEL			REG_BIT(23) /* pre-ivb */
+#define   VGA_PALETTE_A_WRITE_DISABLE		REG_BIT(22) /* pre-ivb */
+#define   VGA_PALETTE_B_WRITE_DISABLE		REG_BIT(21) /* pre-ivb */
+#define   VGA_LEGACY_8BIT_PALETTE_ENABLE	REG_BIT(20)
+#define   VGA_PALETTE_BYPASS			REG_BIT(19)
+#define   VGA_NINE_DOT_DISABLE			REG_BIT(18)
+#define   VGA_PALETTE_READ_SEL_HI_CHV		REG_BIT(15) /* chv */
+#define   VGA_PALETTE_C_WRITE_DISABLE_CHV	REG_BIT(14) /* chv */
+#define   VGA_ACTIVE_THROTTLING_MASK		REG_GENMASK(15, 12) /* ilk+ */
+#define   VGA_BLANK_THROTTLING_MASK		REG_GENMASK(11, 8) /* ilk+ */
+#define   VGA_BLINK_DUTY_CYCLE_MASK		REG_GENMASK(7, 6)
+#define   VGA_VSYNC_BLINK_RATE_MASK		REG_GENMASK(5, 0)
+
+#endif /* __INTEL_VGA_REGS_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c
index 633a66f6b73b..c6565baf815a 100644
--- a/drivers/gpu/drm/i915/display/intel_vrr.c
+++ b/drivers/gpu/drm/i915/display/intel_vrr.c
@@ -444,7 +444,10 @@ static u32 trans_vrr_ctl(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_display *display = to_intel_display(crtc_state);
 
-	if (DISPLAY_VER(display) >= 13)
+	if (DISPLAY_VER(display) >= 14)
+		return VRR_CTL_FLIP_LINE_EN |
+			XELPD_VRR_CTL_VRR_GUARDBAND(crtc_state->vrr.guardband);
+	else if (DISPLAY_VER(display) >= 13)
 		return VRR_CTL_IGN_MAX_SHIFT | VRR_CTL_FLIP_LINE_EN |
 			XELPD_VRR_CTL_VRR_GUARDBAND(crtc_state->vrr.guardband);
 	else
diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c
index ee81220a7c88..c855426544cf 100644
--- a/drivers/gpu/drm/i915/display/skl_scaler.c
+++ b/drivers/gpu/drm/i915/display/skl_scaler.c
@@ -3,8 +3,10 @@
  * Copyright © 2020 Intel Corporation
  */
 
-#include "i915_drv.h"
+#include <drm/drm_print.h>
+
 #include "i915_reg.h"
+#include "i915_utils.h"
 #include "intel_de.h"
 #include "intel_display_trace.h"
 #include "intel_display_types.h"
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index f7c3ee1f8664..c7b336359a5e 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -2693,24 +2693,22 @@ static void
 skl_plane_enable_flip_done(struct intel_plane *plane)
 {
 	struct intel_display *display = to_intel_display(plane);
-	struct drm_i915_private *i915 = to_i915(plane->base.dev);
 	enum pipe pipe = plane->pipe;
 
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	bdw_enable_pipe_irq(display, pipe, GEN9_PIPE_PLANE_FLIP_DONE(plane->id));
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 static void
 skl_plane_disable_flip_done(struct intel_plane *plane)
 {
 	struct intel_display *display = to_intel_display(plane);
-	struct drm_i915_private *i915 = to_i915(plane->base.dev);
 	enum pipe pipe = plane->pipe;
 
-	spin_lock_irq(&i915->irq_lock);
+	spin_lock_irq(&display->irq.lock);
 	bdw_disable_pipe_irq(display, pipe, GEN9_PIPE_PLANE_FLIP_DONE(plane->id));
-	spin_unlock_irq(&i915->irq_lock);
+	spin_unlock_irq(&display->irq.lock);
 }
 
 static bool skl_plane_has_rc_ccs(struct intel_display *display,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
index ddda468241ef..6e4d0ce3952f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2014-2016 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 7d97ea2a653e..c4854c5b4e0f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2016 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
index e6c382973129..9d7ee1579900 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
@@ -1,6 +1,5 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2016 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index ab1af978911b..15835952352e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2011-2012 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index e5b0f66ea1fe..6e682a6a0574 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -1,6 +1,5 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2016 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 67ac2586a0f3..0267c924634b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -1,6 +1,5 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2019 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 9473050ac842..05e440643aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright 2012 Red Hat Inc
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 75a143d996e0..7a0cc51923b3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2014-2016 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 7796c4119ef5..ca7e9216934a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2008,2010 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index ea7561ae6e13..232b984f60b6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2014-2016 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
index 28d6526e32ab..8044d34707b6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
@@ -1,6 +1,5 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2019 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index c3dabb857960..f6d37dff320d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2014-2016 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
index 196417fd0f5c..946fb9825eb3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
@@ -1,6 +1,5 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2019 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 356530b599ce..1f38e367c60b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
 /*
  * Copyright © 2017 Intel Corporation
  *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index a5f34542135c..c34f41605b46 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -1,6 +1,5 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2016 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 68413c05c812..64600aa8227f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -1,6 +1,5 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2016 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 8780aa243105..7f83f8bdc8fb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2014-2016 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index ef85c6dc9fd5..f9e7cab140f8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2014-2016 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 900c08337942..f0857c5c96df 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2019 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h
index bedf1e95941a..bd5bd2c5e7f9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h
@@ -1,6 +1,5 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2019 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index ae3343c81a64..3b81cc3ad5be 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2014-2016 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index aec41f0f098f..b81e67504bbe 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2008-2015 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 9d958a6f377e..3380151edfc1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2008-2012 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
index af85d0c28168..8814cbcde5b5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2014-2016 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index 5ac23ff3feff..5a296ba3758a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2008 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 09b68713ab32..307a18eede72 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2012-2014 Intel Corporation
  *
-  * Based on amdgpu_mn, which bears the following notice:
+ * Based on amdgpu_mn, which bears the following notice:
  *
  * Copyright 2014 Advanced Micro Devices, Inc.
  * All Rights Reserved.
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
index 1f55e62044a4..7127e90c1a8f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2016 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c
index 46b9a17d6abc..65d84a93c525 100644
--- a/drivers/gpu/drm/i915/gem/i915_gemfs.c
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: MIT
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2017 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.h b/drivers/gpu/drm/i915/gem/i915_gemfs.h
index 5d835e44c4f6..16d4333c9a4e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gemfs.h
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.h
@@ -1,6 +1,5 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2017 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 804f74084bd4..9c3f17e51885 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -1837,6 +1837,8 @@ static int igt_mmap_revoke(void *arg)
 
 int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
 {
+	int ret;
+	bool unuse_mm = false;
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_partial_tiling),
 		SUBTEST(igt_smoke_tiling),
@@ -1848,5 +1850,15 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_mmap_gpu),
 	};
 
-	return i915_live_subtests(tests, i915);
+	if (!current->mm) {
+		kthread_use_mm(current->active_mm);
+		unuse_mm = true;
+	}
+
+	ret = i915_live_subtests(tests, i915);
+
+	if (unuse_mm)
+		kthread_unuse_mm(current->active_mm);
+
+	return ret;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 30b128b1fde7..afbc5c769308 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -176,7 +176,6 @@ static void clear_vm_list(struct list_head *list)
 			i915_vma_destroy_locked(vma);
 			i915_gem_object_put(obj);
 		}
-
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 51847a846002..c481b56fa67d 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -751,7 +751,6 @@ static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
 
 static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
 {
-
 	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
 		/*
 		 * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index cf41d325712e..5dd8121f4b15 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -314,7 +314,6 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = {
 };
 
 static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
-
 	/* UC */
 	MOCS_ENTRY(1, 0, L3_1_UC),
 	/* WB - L3 */
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 6e9977b2d180..a876a34455f1 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -365,7 +365,13 @@ static void reset_prepare(struct intel_engine_cs *engine)
 			     ENGINE_READ_FW(engine, RING_HEAD),
 			     ENGINE_READ_FW(engine, RING_TAIL),
 			     ENGINE_READ_FW(engine, RING_START));
-		if (!stop_ring(engine)) {
+		/*
+		 * Sometimes engine head failed to set to zero even after writing into it.
+		 * Use wait_for_atomic() with 20ms delay to let engine resumes from
+		 * correct RING_HEAD. Experimented different values and determined
+		 * that 20ms works best based on testing.
+		 */
+		if (wait_for_atomic((!stop_ring(engine) == 0), 20)) {
 			drm_err(&engine->i915->drm,
 				"failed to set %s head to zero "
 				"ctl %08x head %08x tail %08x start %08x\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 8731f275fdd9..5abc5fcc2514 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -8,7 +8,7 @@
 #include <drm/intel/i915_drm.h>
 
 #include "display/intel_display.h"
-#include "display/intel_display_irq.h"
+#include "display/intel_display_rps.h"
 #include "i915_drv.h"
 #include "i915_irq.h"
 #include "i915_reg.h"
@@ -608,9 +608,7 @@ static bool gen5_rps_enable(struct intel_rps *rps)
 	rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
 	rps->ips.last_time2 = ktime_get_raw_ns();
 
-	spin_lock(&i915->irq_lock);
-	ilk_enable_display_irq(display, DE_PCU_EVENT);
-	spin_unlock(&i915->irq_lock);
+	ilk_display_rps_enable(display);
 
 	spin_unlock_irq(&mchdev_lock);
 
@@ -628,9 +626,7 @@ static void gen5_rps_disable(struct intel_rps *rps)
 
 	spin_lock_irq(&mchdev_lock);
 
-	spin_lock(&i915->irq_lock);
-	ilk_disable_display_irq(display, DE_PCU_EVENT);
-	spin_unlock(&i915->irq_lock);
+	ilk_display_rps_disable(display);
 
 	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
 
@@ -1003,6 +999,10 @@ void intel_rps_dec_waiters(struct intel_rps *rps)
 	if (rps_uses_slpc(rps)) {
 		slpc = rps_to_slpc(rps);
 
+		/* Don't decrement num_waiters for req where increment was skipped */
+		if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING)
+			return;
+
 		intel_guc_slpc_dec_waiters(slpc);
 	} else {
 		atomic_dec(&rps->num_waiters);
@@ -1031,11 +1031,15 @@ void intel_rps_boost(struct i915_request *rq)
 			if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING)
 				return;
 
-			if (slpc->min_freq_softlimit >= slpc->boost_freq)
-				return;
-
 			/* Return if old value is non zero */
 			if (!atomic_fetch_inc(&slpc->num_waiters)) {
+				/*
+				 * Skip queuing boost work if frequency is already boosted,
+				 * but still increment num_waiters.
+				 */
+				if (slpc->min_freq_softlimit >= slpc->boost_freq)
+					return;
+
 				GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
 					 rq->fence.context, rq->fence.seqno);
 				queue_work(rps_to_gt(rps)->i915->unordered_wq,
diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h
index 5135b90a2a40..ece445109305 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h
@@ -57,7 +57,7 @@ struct intel_rps {
 
 	/*
 	 * work, interrupts_enabled and pm_iir are protected by
-	 * i915->irq_lock
+	 * gt->irq_lock
 	 */
 	struct timer_list timer;
 	struct work_struct work;
diff --git a/drivers/gpu/drm/i915/gt/intel_wopcm.h b/drivers/gpu/drm/i915/gt/intel_wopcm.h
index 17d6aa86008a..d2038b6de5e7 100644
--- a/drivers/gpu/drm/i915/gt/intel_wopcm.h
+++ b/drivers/gpu/drm/i915/gt/intel_wopcm.h
@@ -1,6 +1,5 @@
+/* SPDX-License-Identifier: MIT */
 /*
- * SPDX-License-Identifier: MIT
- *
  * Copyright © 2017-2018 Intel Corporation
  */
 
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 116683ebe074..b37e400f74e5 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -156,7 +156,7 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
 	if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
 		struct i915_wa *list;
 
-		list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
+		list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*list),
 				     GFP_KERNEL);
 		if (!list) {
 			drm_err(&i915->drm, "No space for workaround init!\n");
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 22e750108c5f..23f04f6f8fba 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -7,6 +7,7 @@
 
 #include "gem/i915_gem_internal.h"
 
+#include "i915_drv.h"
 #include "i915_selftest.h"
 #include "intel_engine_heartbeat.h"
 #include "intel_engine_pm.h"
@@ -859,6 +860,14 @@ static int live_lrc_timestamp(void *arg)
 	};
 
 	/*
+	 * This test was designed to isolate a hardware bug.
+	 * The bug was found and fixed in future generations but
+	 * now the test pollutes our CI on previous generation.
+	 */
+	if (GRAPHICS_VER(gt->i915) == 12)
+		return 0;
+
+	/*
 	 * We want to verify that the timestamp is saved and restore across
 	 * context switches and is monotonic.
 	 *
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
index 908483ab0bc8..41716ed454b7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -33,15 +33,22 @@ int live_rc6_manual(void *arg)
 {
 	struct intel_gt *gt = arg;
 	struct intel_rc6 *rc6 = &gt->rc6;
-	u64 rc0_power, rc6_power;
+	struct intel_rps *rps = &gt->rps;
 	intel_wakeref_t wakeref;
+	u64 rc0_sample_energy[2];
+	u64 rc6_sample_energy[2];
+	u64 sleep_time = 1000;
+	u32 rc0_freq = 0;
+	u32 rc6_freq = 0;
+	u64 rc0_power;
+	u64 rc6_power;
 	bool has_power;
+	u64 threshold;
 	ktime_t dt;
 	u64 res[2];
 	int err = 0;
-	u32 rc0_freq = 0;
-	u32 rc6_freq = 0;
-	struct intel_rps *rps = &gt->rps;
+	u64 diff;
+
 
 	/*
 	 * Our claim is that we can "encourage" the GPU to enter rc6 at will.
@@ -60,14 +67,15 @@ int live_rc6_manual(void *arg)
 
 	/* Force RC6 off for starters */
 	__intel_rc6_disable(rc6);
-	msleep(1); /* wakeup is not immediate, takes about 100us on icl */
+	/* wakeup is not immediate, takes about 100us on icl */
+	usleep_range(1000, 2000);
 
 	res[0] = rc6_residency(rc6);
 
 	dt = ktime_get();
-	rc0_power = librapl_energy_uJ();
-	msleep(1000);
-	rc0_power = librapl_energy_uJ() - rc0_power;
+	rc0_sample_energy[0] = librapl_energy_uJ();
+	msleep(sleep_time);
+	rc0_sample_energy[1] = librapl_energy_uJ() - rc0_sample_energy[0];
 	dt = ktime_sub(ktime_get(), dt);
 	res[1] = rc6_residency(rc6);
 	rc0_freq = intel_rps_read_actual_frequency_fw(rps);
@@ -79,11 +87,12 @@ int live_rc6_manual(void *arg)
 	}
 
 	if (has_power) {
-		rc0_power = div64_u64(NSEC_PER_SEC * rc0_power,
+		rc0_power = div64_u64(NSEC_PER_SEC * rc0_sample_energy[1],
 				      ktime_to_ns(dt));
+
 		if (!rc0_power) {
 			if (rc0_freq)
-				pr_debug("No power measured while in RC0! GPU Freq: %u in RC0\n",
+				pr_debug("No power measured while in RC0! GPU Freq: %uMHz in RC0\n",
 					 rc0_freq);
 			else
 				pr_err("No power and freq measured while in RC0\n");
@@ -98,10 +107,10 @@ int live_rc6_manual(void *arg)
 	res[0] = rc6_residency(rc6);
 	intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL);
 	dt = ktime_get();
-	rc6_power = librapl_energy_uJ();
-	msleep(1000);
+	rc6_sample_energy[0] = librapl_energy_uJ();
+	msleep(sleep_time);
 	rc6_freq = intel_rps_read_actual_frequency_fw(rps);
-	rc6_power = librapl_energy_uJ() - rc6_power;
+	rc6_sample_energy[1] = librapl_energy_uJ() - rc6_sample_energy[0];
 	dt = ktime_sub(ktime_get(), dt);
 	res[1] = rc6_residency(rc6);
 	if (res[1] == res[0]) {
@@ -113,13 +122,24 @@ int live_rc6_manual(void *arg)
 	}
 
 	if (has_power) {
-		rc6_power = div64_u64(NSEC_PER_SEC * rc6_power,
+		rc6_power = div64_u64(NSEC_PER_SEC * rc6_sample_energy[1],
 				      ktime_to_ns(dt));
-		pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n",
+		pr_info("GPU consumed %lluuW in RC0 and %lluuW in RC6\n",
 			rc0_power, rc6_power);
+
 		if (2 * rc6_power > rc0_power) {
-			pr_err("GPU leaked energy while in RC6! GPU Freq: %u in RC6 and %u in RC0\n",
-			       rc6_freq, rc0_freq);
+			pr_err("GPU leaked energy while in RC6!\n"
+			       "GPU Freq: %uMHz in RC6 and %uMHz in RC0\n"
+			       "RC0 energy before & after sleep respectively: %lluuJ %lluuJ\n"
+			       "RC6 energy before & after sleep respectively: %lluuJ %lluuJ\n",
+			       rc6_freq, rc0_freq, rc0_sample_energy[0], rc0_sample_energy[1],
+			       rc6_sample_energy[0], rc6_sample_energy[1]);
+
+			diff = res[1] - res[0];
+			threshold = (9 * NSEC_PER_MSEC * sleep_time) / 10;
+			if (diff < threshold)
+				pr_err("Did not enter RC6 properly, RC6 start residency=%lluns, RC6 end residency=%lluns\n",
+				       res[0], res[1]);
 			err = -EINVAL;
 			goto out_unlock;
 		}
diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c
index 3941f2d6fa47..69ed946a39e5 100644
--- a/drivers/gpu/drm/i915/gt/selftest_tlb.c
+++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c
@@ -143,7 +143,7 @@ pte_tlbinv(struct intel_context *ce,
 	if (ce->engine->class == OTHER_CLASS)
 		msleep(200);
 	else
-		msleep(10);
+		usleep_range(10000, 20000);
 
 	if (va == vb) {
 		if (!i915_request_completed(rq)) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
index 5dc0ccd07636..d550eb6edfb8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
@@ -230,7 +230,7 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s
 		gt_info(gt, "Invalid GSC firmware for MTL/ARL, got %d.%d.%d.%d but need 102.x.x.x",
 			gsc->release.major, gsc->release.minor,
 			gsc->release.patch, gsc->release.build);
-			return -EINVAL;
+		return -EINVAL;
 	}
 
 	if (min_ver.major) {
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index e6e9010462e3..1344e6d20a34 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -56,6 +56,7 @@
 #include "display/intel_pps_regs.h"
 #include "display/intel_psr_regs.h"
 #include "display/intel_sprite_regs.h"
+#include "display/intel_vga_regs.h"
 #include "display/skl_universal_plane_regs.h"
 #include "display/skl_watermark_regs.h"
 #include "display/vlv_dsi_pll_regs.h"
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index f5262b8ad237..273bc43468a0 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -62,7 +62,6 @@
 #include "display/intel_pch_refclk.h"
 #include "display/intel_pps.h"
 #include "display/intel_sprite_uapi.h"
-#include "display/intel_vga.h"
 #include "display/skl_watermark.h"
 
 #include "gem/i915_gem_context.h"
@@ -235,7 +234,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
 
 	intel_uncore_mmio_debug_init_early(dev_priv);
 
-	spin_lock_init(&dev_priv->irq_lock);
 	spin_lock_init(&dev_priv->gpu_error.lock);
 
 	intel_sbi_init(dev_priv);
@@ -263,9 +261,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
 
 	i915_gem_init_early(dev_priv);
 
-	/* This must be called before any calls to HAS_PCH_* */
-	intel_detect_pch(dev_priv);
-
 	intel_irq_init(dev_priv);
 	intel_display_driver_early_probe(display);
 	intel_clock_gating_hooks_init(dev_priv);
@@ -1205,8 +1200,6 @@ static int i915_drm_resume(struct drm_device *dev)
 
 	i9xx_display_sr_restore(display);
 
-	intel_vga_redisable(display);
-
 	intel_gmbus_reset(display);
 
 	intel_pps_unlock_regs_wa(display);
diff --git a/drivers/gpu/drm/i915/i915_driver.h b/drivers/gpu/drm/i915/i915_driver.h
index 4b67ad9a61cd..1e95ecb2a163 100644
--- a/drivers/gpu/drm/i915/i915_driver.h
+++ b/drivers/gpu/drm/i915/i915_driver.h
@@ -15,7 +15,6 @@ struct drm_printer;
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_TIMESTAMP	1695980603
 
 extern const struct dev_pm_ops i915_pm_ops;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 236c48d282e4..d0e1980dcba2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -49,8 +49,6 @@
 #include "gt/intel_workarounds.h"
 #include "gt/uc/intel_uc.h"
 
-#include "soc/intel_pch.h"
-
 #include "i915_drm_client.h"
 #include "i915_gem.h"
 #include "i915_gpu_error.h"
@@ -224,8 +222,6 @@ struct drm_i915_private {
 	};
 	unsigned int engine_uabi_class_count[I915_LAST_UABI_ENGINE_CLASS + 1];
 
-	/* protects the irq masks */
-	spinlock_t irq_lock;
 	bool irqs_enabled;
 
 	/* LPT/WPT IOSF sideband protection */
@@ -272,9 +268,6 @@ struct drm_i915_private {
 	/* pm private clock gating functions */
 	const struct drm_i915_clock_gating_funcs *clock_gating_funcs;
 
-	/* PCH chipset type */
-	enum intel_pch pch_type;
-
 	unsigned long gem_quirks;
 
 	struct i915_gem_mm mm;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a6613eed3398..4f785cdbd155 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -726,13 +726,6 @@ static void err_print_gt_info(struct drm_i915_error_state_buf *m,
 	intel_sseu_print_topology(gt->_gt->i915, &gt->info.sseu, &p);
 }
 
-static void err_print_gt_display(struct drm_i915_error_state_buf *m,
-				 struct intel_gt_coredump *gt)
-{
-	err_printf(m, "IER: 0x%08x\n", gt->ier);
-	err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr);
-}
-
 static void err_print_gt_global_nonguc(struct drm_i915_error_state_buf *m,
 				       struct intel_gt_coredump *gt)
 {
@@ -878,7 +871,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 		if (error->gt->uc && error->gt->uc->guc.is_guc_capture)
 			print_guc_capture = true;
 
-		err_print_gt_display(m, error->gt);
 		err_print_gt_global_nonguc(m, error->gt);
 		err_print_gt_fences(m, error->gt);
 
@@ -1767,27 +1759,6 @@ gt_record_uc(struct intel_gt_coredump *gt,
 	return error_uc;
 }
 
-/* Capture display registers. */
-static void gt_record_display_regs(struct intel_gt_coredump *gt)
-{
-	struct intel_uncore *uncore = gt->_gt->uncore;
-	struct drm_i915_private *i915 = uncore->i915;
-
-	if (DISPLAY_VER(i915) >= 6 && DISPLAY_VER(i915) < 20)
-		gt->derrmr = intel_uncore_read(uncore, DERRMR);
-
-	if (GRAPHICS_VER(i915) >= 8)
-		gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
-	else if (IS_VALLEYVIEW(i915))
-		gt->ier = intel_uncore_read(uncore, VLV_IER);
-	else if (HAS_PCH_SPLIT(i915))
-		gt->ier = intel_uncore_read(uncore, DEIER);
-	else if (GRAPHICS_VER(i915) == 2)
-		gt->ier = intel_uncore_read16(uncore, GEN2_IER);
-	else
-		gt->ier = intel_uncore_read(uncore, GEN2_IER);
-}
-
 /* Capture all other registers that GuC doesn't capture. */
 static void gt_record_global_nonguc_regs(struct intel_gt_coredump *gt)
 {
@@ -1821,9 +1792,12 @@ static void gt_record_global_nonguc_regs(struct intel_gt_coredump *gt)
 			gt->gtier[i] =
 				intel_uncore_read(uncore, GEN8_GT_IER(i));
 		gt->ngtier = 4;
-	} else if (HAS_PCH_SPLIT(i915)) {
+	} else if (GRAPHICS_VER(i915) >= 5) {
 		gt->gtier[0] = intel_uncore_read(uncore, GTIER);
 		gt->ngtier = 1;
+	} else {
+		gt->gtier[0] = intel_uncore_read(uncore, GEN2_IER);
+		gt->ngtier = 1;
 	}
 
 	gt->eir = intel_uncore_read(uncore, EIR);
@@ -2043,7 +2017,6 @@ intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags)
 	gc->_gt = gt;
 	gc->awake = intel_gt_pm_is_awake(gt);
 
-	gt_record_display_regs(gc);
 	gt_record_global_nonguc_regs(gc);
 
 	/*
@@ -2160,7 +2133,6 @@ i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump
 void i915_error_state_store(struct i915_gpu_coredump *error)
 {
 	struct drm_i915_private *i915;
-	static bool warned;
 
 	if (IS_ERR_OR_NULL(error))
 		return;
@@ -2174,16 +2146,8 @@ void i915_error_state_store(struct i915_gpu_coredump *error)
 
 	i915_gpu_coredump_get(error);
 
-	if (!xchg(&warned, true) &&
-	    ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) {
-		pr_info("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n");
-		pr_info("Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/intel/issues/new.\n");
-		pr_info("Please see https://drm.pages.freedesktop.org/intel-docs/how-to-file-i915-bugs.html for details.\n");
-		pr_info("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n");
-		pr_info("The GPU crash dump is required to analyze GPU hangs, so please always attach it.\n");
-		pr_info("GPU crash dump saved to /sys/class/drm/card%d/error\n",
-			i915->drm.primary->index);
-	}
+	drm_info(&i915->drm, "GPU error state saved to /sys/class/drm/card%d/error\n",
+		 i915->drm.primary->index);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 749e1c55613e..182324979278 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -146,7 +146,6 @@ struct intel_gt_coredump {
 	/* Generic register state */
 	u32 eir;
 	u32 pgtbl_er;
-	u32 ier;
 	u32 gtier[6], ngtier;
 	u32 forcewake;
 	u32 error; /* gen6+ */
@@ -164,8 +163,6 @@ struct intel_gt_coredump {
 	u32 clock_frequency;
 	u32 clock_period_ns;
 
-	/* Display related */
-	u32 derrmr;
 	u32 sfc_done[I915_MAX_SFC]; /* gen12 */
 
 	u32 nfence;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c1f938a1da44..95042879bec4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -658,23 +658,9 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
-static void ibx_irq_reset(struct drm_i915_private *dev_priv)
-{
-	struct intel_uncore *uncore = &dev_priv->uncore;
-
-	if (HAS_PCH_NOP(dev_priv))
-		return;
-
-	gen2_irq_reset(uncore, SDE_IRQ_REGS);
-
-	if (HAS_PCH_CPT(dev_priv) || HAS_PCH_LPT(dev_priv))
-		intel_uncore_write(&dev_priv->uncore, SERR_INT, 0xffffffff);
-}
-
-/* drm_dma.h hooks
-*/
 static void ilk_irq_reset(struct drm_i915_private *dev_priv)
 {
+	struct intel_display *display = &dev_priv->display;
 	struct intel_uncore *uncore = &dev_priv->uncore;
 
 	gen2_irq_reset(uncore, DE_IRQ_REGS);
@@ -690,7 +676,7 @@ static void ilk_irq_reset(struct drm_i915_private *dev_priv)
 
 	gen5_gt_irq_reset(to_gt(dev_priv));
 
-	ibx_irq_reset(dev_priv);
+	ibx_display_irq_reset(display);
 }
 
 static void valleyview_irq_reset(struct drm_i915_private *dev_priv)
@@ -702,9 +688,7 @@ static void valleyview_irq_reset(struct drm_i915_private *dev_priv)
 
 	gen5_gt_irq_reset(to_gt(dev_priv));
 
-	spin_lock_irq(&dev_priv->irq_lock);
 	vlv_display_irq_reset(display);
-	spin_unlock_irq(&dev_priv->irq_lock);
 }
 
 static void gen8_irq_reset(struct drm_i915_private *dev_priv)
@@ -717,10 +701,6 @@ static void gen8_irq_reset(struct drm_i915_private *dev_priv)
 	gen8_gt_irq_reset(to_gt(dev_priv));
 	gen8_display_irq_reset(display);
 	gen2_irq_reset(uncore, GEN8_PCU_IRQ_REGS);
-
-	if (HAS_PCH_SPLIT(dev_priv))
-		ibx_irq_reset(dev_priv);
-
 }
 
 static void gen11_irq_reset(struct drm_i915_private *dev_priv)
@@ -770,9 +750,7 @@ static void cherryview_irq_reset(struct drm_i915_private *dev_priv)
 
 	gen2_irq_reset(uncore, GEN8_PCU_IRQ_REGS);
 
-	spin_lock_irq(&dev_priv->irq_lock);
 	vlv_display_irq_reset(display);
-	spin_unlock_irq(&dev_priv->irq_lock);
 }
 
 static void ilk_irq_postinstall(struct drm_i915_private *dev_priv)
@@ -790,9 +768,7 @@ static void valleyview_irq_postinstall(struct drm_i915_private *dev_priv)
 
 	gen5_gt_irq_postinstall(to_gt(dev_priv));
 
-	spin_lock_irq(&dev_priv->irq_lock);
 	vlv_display_irq_postinstall(display);
-	spin_unlock_irq(&dev_priv->irq_lock);
 
 	intel_uncore_write(&dev_priv->uncore, VLV_MASTER_IER, MASTER_INTERRUPT_ENABLE);
 	intel_uncore_posting_read(&dev_priv->uncore, VLV_MASTER_IER);
@@ -849,9 +825,7 @@ static void cherryview_irq_postinstall(struct drm_i915_private *dev_priv)
 
 	gen8_gt_irq_postinstall(to_gt(dev_priv));
 
-	spin_lock_irq(&dev_priv->irq_lock);
 	vlv_display_irq_postinstall(display);
-	spin_unlock_irq(&dev_priv->irq_lock);
 
 	intel_uncore_write(&dev_priv->uncore, GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
 	intel_uncore_posting_read(&dev_priv->uncore, GEN8_MASTER_IRQ);
@@ -961,14 +935,7 @@ static void i915_irq_postinstall(struct drm_i915_private *dev_priv)
 
 	gen2_irq_init(uncore, GEN2_IRQ_REGS, dev_priv->irq_mask, enable_mask);
 
-	/* Interrupt setup is already guaranteed to be single-threaded, this is
-	 * just to make the assert_spin_locked check happy. */
-	spin_lock_irq(&dev_priv->irq_lock);
-	i915_enable_pipestat(display, PIPE_A, PIPE_CRC_DONE_INTERRUPT_STATUS);
-	i915_enable_pipestat(display, PIPE_B, PIPE_CRC_DONE_INTERRUPT_STATUS);
-	spin_unlock_irq(&dev_priv->irq_lock);
-
-	i915_enable_asle_pipestat(display);
+	i915_display_irq_postinstall(display);
 }
 
 static irqreturn_t i915_irq_handler(int irq, void *arg)
@@ -1086,15 +1053,7 @@ static void i965_irq_postinstall(struct drm_i915_private *dev_priv)
 
 	gen2_irq_init(uncore, GEN2_IRQ_REGS, dev_priv->irq_mask, enable_mask);
 
-	/* Interrupt setup is already guaranteed to be single-threaded, this is
-	 * just to make the assert_spin_locked check happy. */
-	spin_lock_irq(&dev_priv->irq_lock);
-	i915_enable_pipestat(display, PIPE_A, PIPE_GMBUS_INTERRUPT_STATUS);
-	i915_enable_pipestat(display, PIPE_A, PIPE_CRC_DONE_INTERRUPT_STATUS);
-	i915_enable_pipestat(display, PIPE_B, PIPE_CRC_DONE_INTERRUPT_STATUS);
-	spin_unlock_irq(&dev_priv->irq_lock);
-
-	i915_enable_asle_pipestat(display);
+	i965_display_irq_postinstall(display);
 }
 
 static irqreturn_t i965_irq_handler(int irq, void *arg)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 49beab8e324d..2e4190da3e0d 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -866,6 +866,7 @@
 #define   FP_M2_DIV_MASK	0x0000003f
 #define   FP_M2_PINEVIEW_DIV_MASK	0x000000ff
 #define   FP_M2_DIV_SHIFT		 0
+
 #define DPLL_TEST	_MMIO(0x606c)
 #define   DPLLB_TEST_SDVO_DIV_1		(0 << 22)
 #define   DPLLB_TEST_SDVO_DIV_2		(1 << 22)
@@ -877,11 +878,13 @@
 #define   DPLLA_TEST_N_BYPASS		(1 << 3)
 #define   DPLLA_TEST_M_BYPASS		(1 << 2)
 #define   DPLLA_INPUT_BUFFER_ENABLE	(1 << 0)
+
 #define D_STATE		_MMIO(0x6104)
 #define  DSTATE_GFX_RESET_I830			(1 << 6)
 #define  DSTATE_PLL_D3_OFF			(1 << 3)
 #define  DSTATE_GFX_CLOCK_GATING		(1 << 1)
 #define  DSTATE_DOT_CLOCK_GATING		(1 << 0)
+
 #define DSPCLK_GATE_D(__i915)		_MMIO(DISPLAY_MMIO_BASE(__i915) + 0x6200)
 # define DPUNIT_B_CLOCK_GATE_DISABLE		(1 << 30) /* 965 */
 # define VSUNIT_CLOCK_GATE_DISABLE		(1 << 29) /* 965 */
@@ -1050,7 +1053,6 @@
 /*
  * Overlay regs
  */
-
 #define OVADD			_MMIO(0x30000)
 #define DOVSTA			_MMIO(0x30008)
 #define OC_BUF			(0x3 << 20)
@@ -1106,7 +1108,6 @@
 /*
  * Display engine regs
  */
-
 /* Pipe/transcoder A timing regs */
 #define _TRANS_HTOTAL_A		0x60000
 #define _TRANS_HTOTAL_B		0x61000
@@ -1397,88 +1398,50 @@
 #define VLV_DP_B		_MMIO(VLV_DISPLAY_BASE + 0x64100)
 #define VLV_DP_C		_MMIO(VLV_DISPLAY_BASE + 0x64200)
 #define CHV_DP_D		_MMIO(VLV_DISPLAY_BASE + 0x64300)
-#define   DP_PORT_EN			(1 << 31)
-#define   DP_PIPE_SEL_SHIFT		30
-#define   DP_PIPE_SEL_MASK		(1 << 30)
-#define   DP_PIPE_SEL(pipe)		((pipe) << 30)
-#define   DP_PIPE_SEL_SHIFT_IVB		29
-#define   DP_PIPE_SEL_MASK_IVB		(3 << 29)
-#define   DP_PIPE_SEL_IVB(pipe)		((pipe) << 29)
+#define   DP_PORT_EN			REG_BIT(31)
+#define   DP_PIPE_SEL_MASK		REG_GENMASK(30, 30)
+#define   DP_PIPE_SEL(pipe)		REG_FIELD_PREP(DP_PIPE_SEL_MASK, (pipe))
+#define   DP_PIPE_SEL_MASK_IVB		REG_GENMASK(30, 29)
+#define   DP_PIPE_SEL_IVB(pipe)		REG_FIELD_PREP(DP_PIPE_SEL_MASK_IVB, (pipe))
 #define   DP_PIPE_SEL_SHIFT_CHV		16
-#define   DP_PIPE_SEL_MASK_CHV		(3 << 16)
-#define   DP_PIPE_SEL_CHV(pipe)		((pipe) << 16)
-
-/* Link training mode - select a suitable mode for each stage */
-#define   DP_LINK_TRAIN_PAT_1		(0 << 28)
-#define   DP_LINK_TRAIN_PAT_2		(1 << 28)
-#define   DP_LINK_TRAIN_PAT_IDLE	(2 << 28)
-#define   DP_LINK_TRAIN_OFF		(3 << 28)
-#define   DP_LINK_TRAIN_MASK		(3 << 28)
-#define   DP_LINK_TRAIN_SHIFT		28
-
-/* CPT Link training mode */
-#define   DP_LINK_TRAIN_PAT_1_CPT	(0 << 8)
-#define   DP_LINK_TRAIN_PAT_2_CPT	(1 << 8)
-#define   DP_LINK_TRAIN_PAT_IDLE_CPT	(2 << 8)
-#define   DP_LINK_TRAIN_OFF_CPT		(3 << 8)
-#define   DP_LINK_TRAIN_MASK_CPT	(7 << 8)
-#define   DP_LINK_TRAIN_SHIFT_CPT	8
-
-/* Signal voltages. These are mostly controlled by the other end */
-#define   DP_VOLTAGE_0_4		(0 << 25)
-#define   DP_VOLTAGE_0_6		(1 << 25)
-#define   DP_VOLTAGE_0_8		(2 << 25)
-#define   DP_VOLTAGE_1_2		(3 << 25)
-#define   DP_VOLTAGE_MASK		(7 << 25)
-#define   DP_VOLTAGE_SHIFT		25
-
-/* Signal pre-emphasis levels, like voltages, the other end tells us what
- * they want
- */
-#define   DP_PRE_EMPHASIS_0		(0 << 22)
-#define   DP_PRE_EMPHASIS_3_5		(1 << 22)
-#define   DP_PRE_EMPHASIS_6		(2 << 22)
-#define   DP_PRE_EMPHASIS_9_5		(3 << 22)
-#define   DP_PRE_EMPHASIS_MASK		(7 << 22)
-#define   DP_PRE_EMPHASIS_SHIFT		22
-
-/* How many wires to use. I guess 3 was too hard */
-#define   DP_PORT_WIDTH(width)		(((width) - 1) << 19)
-#define   DP_PORT_WIDTH_MASK		(7 << 19)
-#define   DP_PORT_WIDTH_SHIFT		19
-
-/* Mystic DPCD version 1.1 special mode */
-#define   DP_ENHANCED_FRAMING		(1 << 18)
-
-/* eDP */
-#define   DP_PLL_FREQ_270MHZ		(0 << 16)
-#define   DP_PLL_FREQ_162MHZ		(1 << 16)
-#define   DP_PLL_FREQ_MASK		(3 << 16)
-
-/* locked once port is enabled */
-#define   DP_PORT_REVERSAL		(1 << 15)
-
-/* eDP */
-#define   DP_PLL_ENABLE			(1 << 14)
-
-/* sends the clock on lane 15 of the PEG for debug */
-#define   DP_CLOCK_OUTPUT_ENABLE	(1 << 13)
-
-#define   DP_SCRAMBLING_DISABLE		(1 << 12)
-#define   DP_SCRAMBLING_DISABLE_IRONLAKE	(1 << 7)
-
-/* limit RGB values to avoid confusing TVs */
-#define   DP_COLOR_RANGE_16_235		(1 << 8)
-
-/* Turn on the audio link */
-#define   DP_AUDIO_OUTPUT_ENABLE	(1 << 6)
-
-/* vs and hs sync polarity */
-#define   DP_SYNC_VS_HIGH		(1 << 4)
-#define   DP_SYNC_HS_HIGH		(1 << 3)
-
-/* A fantasy */
-#define   DP_DETECTED			(1 << 2)
+#define   DP_PIPE_SEL_MASK_CHV		REG_GENMASK(17, 16)
+#define   DP_PIPE_SEL_CHV(pipe)		REG_FIELD_PREP(DP_PIPE_SEL_MASK_CHV, (pipe))
+#define   DP_LINK_TRAIN_MASK		REG_GENMASK(29, 28)
+#define   DP_LINK_TRAIN_PAT_1		REG_FIELD_PREP(DP_LINK_TRAIN_MASK, 0)
+#define   DP_LINK_TRAIN_PAT_2		REG_FIELD_PREP(DP_LINK_TRAIN_MASK, 1)
+#define   DP_LINK_TRAIN_PAT_IDLE	REG_FIELD_PREP(DP_LINK_TRAIN_MASK, 2)
+#define   DP_LINK_TRAIN_OFF		REG_FIELD_PREP(DP_LINK_TRAIN_MASK, 3)
+#define   DP_LINK_TRAIN_MASK_CPT	REG_GENMASK(10, 8)
+#define   DP_LINK_TRAIN_PAT_1_CPT	REG_FIELD_PREP(DP_LINK_TRAIN_MASK_CPT, 0)
+#define   DP_LINK_TRAIN_PAT_2_CPT	REG_FIELD_PREP(DP_LINK_TRAIN_MASK_CPT, 1)
+#define   DP_LINK_TRAIN_PAT_IDLE_CPT	REG_FIELD_PREP(DP_LINK_TRAIN_MASK_CPT, 2)
+#define   DP_LINK_TRAIN_OFF_CPT		REG_FIELD_PREP(DP_LINK_TRAIN_MASK_CPT, 3)
+#define   DP_VOLTAGE_MASK		REG_GENMASK(27, 25)
+#define   DP_VOLTAGE_0_4		REG_FIELD_PREP(DP_VOLTAGE_MASK, 0)
+#define   DP_VOLTAGE_0_6		REG_FIELD_PREP(DP_VOLTAGE_MASK, 1)
+#define   DP_VOLTAGE_0_8		REG_FIELD_PREP(DP_VOLTAGE_MASK, 2)
+#define   DP_VOLTAGE_1_2		REG_FIELD_PREP(DP_VOLTAGE_MASK, 3)
+#define   DP_PRE_EMPHASIS_MASK		REG_GENMASK(24, 22)
+#define   DP_PRE_EMPHASIS_0		REG_FIELD_PREP(DP_PRE_EMPHASIS_MASK, 0)
+#define   DP_PRE_EMPHASIS_3_5		REG_FIELD_PREP(DP_PRE_EMPHASIS_MASK, 1)
+#define   DP_PRE_EMPHASIS_6		REG_FIELD_PREP(DP_PRE_EMPHASIS_MASK, 2)
+#define   DP_PRE_EMPHASIS_9_5		REG_FIELD_PREP(DP_PRE_EMPHASIS_MASK, 3)
+#define   DP_PORT_WIDTH_MASK		REG_GENMASK(21, 19)
+#define   DP_PORT_WIDTH(width)		REG_FIELD_PREP(DP_PORT_WIDTH_MASK, (width) - 1)
+#define   DP_ENHANCED_FRAMING		REG_BIT(18)
+#define   EDP_PLL_FREQ_MASK		REG_GENMASK(17, 16)
+#define   EDP_PLL_FREQ_270MHZ		REG_FIELD_PREP(EDP_PLL_FREQ_MASK, 0)
+#define   EDP_PLL_FREQ_162MHZ		REG_FIELD_PREP(EDP_PLL_FREQ_MASK, 1)
+#define   DP_PORT_REVERSAL		REG_BIT(15)
+#define   EDP_PLL_ENABLE		REG_BIT(14)
+#define   DP_CLOCK_OUTPUT_ENABLE	REG_BIT(13)
+#define   DP_SCRAMBLING_DISABLE		REG_BIT(12)
+#define   DP_SCRAMBLING_DISABLE_ILK	REG_BIT(7)
+#define   DP_COLOR_RANGE_16_235		REG_BIT(8)
+#define   DP_AUDIO_OUTPUT_ENABLE	REG_BIT(6)
+#define   DP_SYNC_VS_HIGH		REG_BIT(4)
+#define   DP_SYNC_HS_HIGH		REG_BIT(3)
+#define   DP_DETECTED			REG_BIT(2)
 
 /*
  * Computing GMCH M and N values for the Display Port link
@@ -1812,18 +1775,6 @@
 #define SWF3(dev_priv, i)	_MMIO(DISPLAY_MMIO_BASE(dev_priv) + 0x72414 + (i) * 4)
 #define SWF_ILK(i)	_MMIO(0x4F000 + (i) * 4)
 
-/* VBIOS regs */
-#define VGACNTRL		_MMIO(0x71400)
-# define VGA_DISP_DISABLE			(1 << 31)
-# define VGA_2X_MODE				(1 << 30)
-# define VGA_PIPE_B_SELECT			(1 << 29)
-
-#define VLV_VGACNTRL		_MMIO(VLV_DISPLAY_BASE + 0x71400)
-
-/* Ironlake */
-
-#define CPU_VGACNTRL	_MMIO(0x41000)
-
 #define DIGITAL_PORT_HOTPLUG_CNTRL	_MMIO(0x44030)
 #define  DIGITAL_PORTA_HOTPLUG_ENABLE		(1 << 4)
 #define  DIGITAL_PORTA_PULSE_DURATION_2ms	(0 << 2) /* pre-HSW */
@@ -2784,7 +2735,6 @@
  * functionality covered in PCH_PORT_HOTPLUG is split into
  * SHOTPLUG_CTL_DDI and SHOTPLUG_CTL_TC.
  */
-
 #define SHOTPLUG_CTL_DDI				_MMIO(0xc4030)
 #define   SHOTPLUG_CTL_DDI_HPD_ENABLE(hpd_pin)			(0x8 << (_HPD_PIN_DDI(hpd_pin) * 4))
 #define   SHOTPLUG_CTL_DDI_HPD_OUTPUT_DATA(hpd_pin)		(0x4 << (_HPD_PIN_DDI(hpd_pin) * 4))
@@ -2864,7 +2814,6 @@
 #define  TRANS_DPLL_ENABLE(pipe)	(1 << ((pipe) * 4 + 3))
 
 /* transcoder */
-
 #define _PCH_TRANS_HTOTAL_A		0xe0000
 #define _PCH_TRANS_HTOTAL_B		0xe1000
 #define PCH_TRANS_HTOTAL(pipe)		_MMIO_PIPE(pipe, _PCH_TRANS_HTOTAL_A, _PCH_TRANS_HTOTAL_B)
@@ -3795,7 +3744,6 @@ enum skl_power_gate {
 /*
  * SKL Clocks
  */
-
 /* CDCLK_CTL */
 #define CDCLK_CTL			_MMIO(0x46000)
 #define  CDCLK_FREQ_SEL_MASK		REG_GENMASK(27, 26)
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 609214231ffc..f7fb40cfdb70 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -40,8 +40,6 @@
 struct drm_i915_private;
 struct timer_list;
 
-#define FDO_BUG_URL "https://drm.pages.freedesktop.org/intel-docs/how-to-file-i915-bugs.html"
-
 #define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \
 			     __stringify(x), (long)(x))
 
diff --git a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c
index 76d84cbb8361..d581a9d2c063 100644
--- a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c
+++ b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c
@@ -21,6 +21,7 @@
 #include "display/intel_pfit_regs.h"
 #include "display/intel_psr_regs.h"
 #include "display/intel_sprite_regs.h"
+#include "display/intel_vga_regs.h"
 #include "display/skl_universal_plane_regs.h"
 #include "display/skl_watermark_regs.h"
 #include "display/vlv_dsi_pll_regs.h"
diff --git a/drivers/gpu/drm/i915/soc/intel_pch.c b/drivers/gpu/drm/i915/soc/intel_pch.c
deleted file mode 100644
index 82dc7fbd1a3e..000000000000
--- a/drivers/gpu/drm/i915/soc/intel_pch.c
+++ /dev/null
@@ -1,316 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright 2019 Intel Corporation.
- */
-
-#include "i915_drv.h"
-#include "i915_utils.h"
-#include "intel_pch.h"
-
-#define INTEL_PCH_DEVICE_ID_MASK		0xff80
-#define INTEL_PCH_IBX_DEVICE_ID_TYPE		0x3b00
-#define INTEL_PCH_CPT_DEVICE_ID_TYPE		0x1c00
-#define INTEL_PCH_PPT_DEVICE_ID_TYPE		0x1e00
-#define INTEL_PCH_LPT_DEVICE_ID_TYPE		0x8c00
-#define INTEL_PCH_LPT_LP_DEVICE_ID_TYPE		0x9c00
-#define INTEL_PCH_WPT_DEVICE_ID_TYPE		0x8c80
-#define INTEL_PCH_WPT_LP_DEVICE_ID_TYPE		0x9c80
-#define INTEL_PCH_SPT_DEVICE_ID_TYPE		0xA100
-#define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE		0x9D00
-#define INTEL_PCH_KBP_DEVICE_ID_TYPE		0xA280
-#define INTEL_PCH_CNP_DEVICE_ID_TYPE		0xA300
-#define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE		0x9D80
-#define INTEL_PCH_CMP_DEVICE_ID_TYPE		0x0280
-#define INTEL_PCH_CMP2_DEVICE_ID_TYPE		0x0680
-#define INTEL_PCH_CMP_V_DEVICE_ID_TYPE		0xA380
-#define INTEL_PCH_ICP_DEVICE_ID_TYPE		0x3480
-#define INTEL_PCH_ICP2_DEVICE_ID_TYPE		0x3880
-#define INTEL_PCH_MCC_DEVICE_ID_TYPE		0x4B00
-#define INTEL_PCH_TGP_DEVICE_ID_TYPE		0xA080
-#define INTEL_PCH_TGP2_DEVICE_ID_TYPE		0x4380
-#define INTEL_PCH_JSP_DEVICE_ID_TYPE		0x4D80
-#define INTEL_PCH_ADP_DEVICE_ID_TYPE		0x7A80
-#define INTEL_PCH_ADP2_DEVICE_ID_TYPE		0x5180
-#define INTEL_PCH_ADP3_DEVICE_ID_TYPE		0x7A00
-#define INTEL_PCH_ADP4_DEVICE_ID_TYPE		0x5480
-#define INTEL_PCH_P2X_DEVICE_ID_TYPE		0x7100
-#define INTEL_PCH_P3X_DEVICE_ID_TYPE		0x7000
-#define INTEL_PCH_QEMU_DEVICE_ID_TYPE		0x2900 /* qemu q35 has 2918 */
-
-/* Map PCH device id to PCH type, or PCH_NONE if unknown. */
-static enum intel_pch
-intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
-{
-	switch (id) {
-	case INTEL_PCH_IBX_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found Ibex Peak PCH\n");
-		drm_WARN_ON(&dev_priv->drm, GRAPHICS_VER(dev_priv) != 5);
-		return PCH_IBX;
-	case INTEL_PCH_CPT_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found CougarPoint PCH\n");
-		drm_WARN_ON(&dev_priv->drm,
-			    GRAPHICS_VER(dev_priv) != 6 && !IS_IVYBRIDGE(dev_priv));
-		return PCH_CPT;
-	case INTEL_PCH_PPT_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found PantherPoint PCH\n");
-		drm_WARN_ON(&dev_priv->drm,
-			    GRAPHICS_VER(dev_priv) != 6 && !IS_IVYBRIDGE(dev_priv));
-		/* PPT is CPT compatible */
-		return PCH_CPT;
-	case INTEL_PCH_LPT_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found LynxPoint PCH\n");
-		drm_WARN_ON(&dev_priv->drm,
-			    !IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv));
-		drm_WARN_ON(&dev_priv->drm,
-			    IS_HASWELL_ULT(dev_priv) || IS_BROADWELL_ULT(dev_priv));
-		return PCH_LPT_H;
-	case INTEL_PCH_LPT_LP_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found LynxPoint LP PCH\n");
-		drm_WARN_ON(&dev_priv->drm,
-			    !IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv));
-		drm_WARN_ON(&dev_priv->drm,
-			    !IS_HASWELL_ULT(dev_priv) && !IS_BROADWELL_ULT(dev_priv));
-		return PCH_LPT_LP;
-	case INTEL_PCH_WPT_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found WildcatPoint PCH\n");
-		drm_WARN_ON(&dev_priv->drm,
-			    !IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv));
-		drm_WARN_ON(&dev_priv->drm,
-			    IS_HASWELL_ULT(dev_priv) || IS_BROADWELL_ULT(dev_priv));
-		/* WPT is LPT compatible */
-		return PCH_LPT_H;
-	case INTEL_PCH_WPT_LP_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found WildcatPoint LP PCH\n");
-		drm_WARN_ON(&dev_priv->drm,
-			    !IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv));
-		drm_WARN_ON(&dev_priv->drm,
-			    !IS_HASWELL_ULT(dev_priv) && !IS_BROADWELL_ULT(dev_priv));
-		/* WPT is LPT compatible */
-		return PCH_LPT_LP;
-	case INTEL_PCH_SPT_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found SunrisePoint PCH\n");
-		drm_WARN_ON(&dev_priv->drm,
-			    !IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv));
-		return PCH_SPT;
-	case INTEL_PCH_SPT_LP_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found SunrisePoint LP PCH\n");
-		drm_WARN_ON(&dev_priv->drm,
-			    !IS_SKYLAKE(dev_priv) &&
-			    !IS_KABYLAKE(dev_priv) &&
-			    !IS_COFFEELAKE(dev_priv) &&
-			    !IS_COMETLAKE(dev_priv));
-		return PCH_SPT;
-	case INTEL_PCH_KBP_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found Kaby Lake PCH (KBP)\n");
-		drm_WARN_ON(&dev_priv->drm,
-			    !IS_SKYLAKE(dev_priv) &&
-			    !IS_KABYLAKE(dev_priv) &&
-			    !IS_COFFEELAKE(dev_priv) &&
-			    !IS_COMETLAKE(dev_priv));
-		/* KBP is SPT compatible */
-		return PCH_SPT;
-	case INTEL_PCH_CNP_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found Cannon Lake PCH (CNP)\n");
-		drm_WARN_ON(&dev_priv->drm,
-			    !IS_COFFEELAKE(dev_priv) &&
-			    !IS_COMETLAKE(dev_priv));
-		return PCH_CNP;
-	case INTEL_PCH_CNP_LP_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm,
-			    "Found Cannon Lake LP PCH (CNP-LP)\n");
-		drm_WARN_ON(&dev_priv->drm,
-			    !IS_COFFEELAKE(dev_priv) &&
-			    !IS_COMETLAKE(dev_priv));
-		return PCH_CNP;
-	case INTEL_PCH_CMP_DEVICE_ID_TYPE:
-	case INTEL_PCH_CMP2_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found Comet Lake PCH (CMP)\n");
-		drm_WARN_ON(&dev_priv->drm,
-			    !IS_COFFEELAKE(dev_priv) &&
-			    !IS_COMETLAKE(dev_priv) &&
-			    !IS_ROCKETLAKE(dev_priv));
-		/* CMP is CNP compatible */
-		return PCH_CNP;
-	case INTEL_PCH_CMP_V_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found Comet Lake V PCH (CMP-V)\n");
-		drm_WARN_ON(&dev_priv->drm,
-			    !IS_COFFEELAKE(dev_priv) &&
-			    !IS_COMETLAKE(dev_priv));
-		/* CMP-V is based on KBP, which is SPT compatible */
-		return PCH_SPT;
-	case INTEL_PCH_ICP_DEVICE_ID_TYPE:
-	case INTEL_PCH_ICP2_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found Ice Lake PCH\n");
-		drm_WARN_ON(&dev_priv->drm, !IS_ICELAKE(dev_priv));
-		return PCH_ICP;
-	case INTEL_PCH_MCC_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found Mule Creek Canyon PCH\n");
-		drm_WARN_ON(&dev_priv->drm, !(IS_JASPERLAKE(dev_priv) ||
-					      IS_ELKHARTLAKE(dev_priv)));
-		/* MCC is TGP compatible */
-		return PCH_TGP;
-	case INTEL_PCH_TGP_DEVICE_ID_TYPE:
-	case INTEL_PCH_TGP2_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found Tiger Lake LP PCH\n");
-		drm_WARN_ON(&dev_priv->drm, !IS_TIGERLAKE(dev_priv) &&
-			    !IS_ROCKETLAKE(dev_priv) &&
-			    !IS_SKYLAKE(dev_priv) &&
-			    !IS_KABYLAKE(dev_priv) &&
-			    !IS_COFFEELAKE(dev_priv) &&
-			    !IS_COMETLAKE(dev_priv));
-		return PCH_TGP;
-	case INTEL_PCH_JSP_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found Jasper Lake PCH\n");
-		drm_WARN_ON(&dev_priv->drm, !(IS_JASPERLAKE(dev_priv) ||
-					      IS_ELKHARTLAKE(dev_priv)));
-		/* JSP is ICP compatible */
-		return PCH_ICP;
-	case INTEL_PCH_ADP_DEVICE_ID_TYPE:
-	case INTEL_PCH_ADP2_DEVICE_ID_TYPE:
-	case INTEL_PCH_ADP3_DEVICE_ID_TYPE:
-	case INTEL_PCH_ADP4_DEVICE_ID_TYPE:
-		drm_dbg_kms(&dev_priv->drm, "Found Alder Lake PCH\n");
-		drm_WARN_ON(&dev_priv->drm, !IS_ALDERLAKE_S(dev_priv) &&
-			    !IS_ALDERLAKE_P(dev_priv));
-		return PCH_ADP;
-	default:
-		return PCH_NONE;
-	}
-}
-
-static bool intel_is_virt_pch(unsigned short id,
-			      unsigned short svendor, unsigned short sdevice)
-{
-	return (id == INTEL_PCH_P2X_DEVICE_ID_TYPE ||
-		id == INTEL_PCH_P3X_DEVICE_ID_TYPE ||
-		(id == INTEL_PCH_QEMU_DEVICE_ID_TYPE &&
-		 svendor == PCI_SUBVENDOR_ID_REDHAT_QUMRANET &&
-		 sdevice == PCI_SUBDEVICE_ID_QEMU));
-}
-
-static void
-intel_virt_detect_pch(const struct drm_i915_private *dev_priv,
-		      unsigned short *pch_id, enum intel_pch *pch_type)
-{
-	unsigned short id = 0;
-
-	/*
-	 * In a virtualized passthrough environment we can be in a
-	 * setup where the ISA bridge is not able to be passed through.
-	 * In this case, a south bridge can be emulated and we have to
-	 * make an educated guess as to which PCH is really there.
-	 */
-
-	if (IS_ALDERLAKE_S(dev_priv) || IS_ALDERLAKE_P(dev_priv))
-		id = INTEL_PCH_ADP_DEVICE_ID_TYPE;
-	else if (IS_TIGERLAKE(dev_priv) || IS_ROCKETLAKE(dev_priv))
-		id = INTEL_PCH_TGP_DEVICE_ID_TYPE;
-	else if (IS_JASPERLAKE(dev_priv) || IS_ELKHARTLAKE(dev_priv))
-		id = INTEL_PCH_MCC_DEVICE_ID_TYPE;
-	else if (IS_ICELAKE(dev_priv))
-		id = INTEL_PCH_ICP_DEVICE_ID_TYPE;
-	else if (IS_COFFEELAKE(dev_priv) ||
-		 IS_COMETLAKE(dev_priv))
-		id = INTEL_PCH_CNP_DEVICE_ID_TYPE;
-	else if (IS_KABYLAKE(dev_priv) || IS_SKYLAKE(dev_priv))
-		id = INTEL_PCH_SPT_DEVICE_ID_TYPE;
-	else if (IS_HASWELL_ULT(dev_priv) || IS_BROADWELL_ULT(dev_priv))
-		id = INTEL_PCH_LPT_LP_DEVICE_ID_TYPE;
-	else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-		id = INTEL_PCH_LPT_DEVICE_ID_TYPE;
-	else if (GRAPHICS_VER(dev_priv) == 6 || IS_IVYBRIDGE(dev_priv))
-		id = INTEL_PCH_CPT_DEVICE_ID_TYPE;
-	else if (GRAPHICS_VER(dev_priv) == 5)
-		id = INTEL_PCH_IBX_DEVICE_ID_TYPE;
-
-	if (id)
-		drm_dbg_kms(&dev_priv->drm, "Assuming PCH ID %04x\n", id);
-	else
-		drm_dbg_kms(&dev_priv->drm, "Assuming no PCH\n");
-
-	*pch_type = intel_pch_type(dev_priv, id);
-
-	/* Sanity check virtual PCH id */
-	if (drm_WARN_ON(&dev_priv->drm,
-			id && *pch_type == PCH_NONE))
-		id = 0;
-
-	*pch_id = id;
-}
-
-void intel_detect_pch(struct drm_i915_private *dev_priv)
-{
-	struct pci_dev *pch = NULL;
-	unsigned short id;
-	enum intel_pch pch_type;
-
-	/*
-	 * South display engine on the same PCI device: just assign the fake
-	 * PCH.
-	 */
-	if (DISPLAY_VER(dev_priv) >= 20) {
-		dev_priv->pch_type = PCH_LNL;
-		return;
-	} else if (IS_BATTLEMAGE(dev_priv) || IS_METEORLAKE(dev_priv)) {
-		/*
-		 * Both north display and south display are on the SoC die.
-		 * The real PCH (if it even exists) is uninvolved in display.
-		 */
-		dev_priv->pch_type = PCH_MTL;
-		return;
-	} else if (IS_DG2(dev_priv)) {
-		dev_priv->pch_type = PCH_DG2;
-		return;
-	} else if (IS_DG1(dev_priv)) {
-		dev_priv->pch_type = PCH_DG1;
-		return;
-	}
-
-	/*
-	 * The reason to probe ISA bridge instead of Dev31:Fun0 is to
-	 * make graphics device passthrough work easy for VMM, that only
-	 * need to expose ISA bridge to let driver know the real hardware
-	 * underneath. This is a requirement from virtualization team.
-	 *
-	 * In some virtualized environments (e.g. XEN), there is irrelevant
-	 * ISA bridge in the system. To work reliably, we should scan through
-	 * all the ISA bridge devices and check for the first match, instead
-	 * of only checking the first one.
-	 */
-	while ((pch = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, pch))) {
-		if (pch->vendor != PCI_VENDOR_ID_INTEL)
-			continue;
-
-		id = pch->device & INTEL_PCH_DEVICE_ID_MASK;
-
-		pch_type = intel_pch_type(dev_priv, id);
-		if (pch_type != PCH_NONE) {
-			dev_priv->pch_type = pch_type;
-			break;
-		} else if (intel_is_virt_pch(id, pch->subsystem_vendor,
-					     pch->subsystem_device)) {
-			intel_virt_detect_pch(dev_priv, &id, &pch_type);
-			dev_priv->pch_type = pch_type;
-			break;
-		}
-	}
-
-	/*
-	 * Use PCH_NOP (PCH but no South Display) for PCH platforms without
-	 * display.
-	 */
-	if (pch && !HAS_DISPLAY(dev_priv)) {
-		drm_dbg_kms(&dev_priv->drm,
-			    "Display disabled, reverting to NOP PCH\n");
-		dev_priv->pch_type = PCH_NOP;
-	} else if (!pch) {
-		if (i915_run_as_guest() && HAS_DISPLAY(dev_priv)) {
-			intel_virt_detect_pch(dev_priv, &id, &pch_type);
-			dev_priv->pch_type = pch_type;
-		} else {
-			drm_dbg_kms(&dev_priv->drm, "No PCH found.\n");
-		}
-	}
-
-	pci_dev_put(pch);
-}
diff --git a/drivers/gpu/drm/i915/soc/intel_pch.h b/drivers/gpu/drm/i915/soc/intel_pch.h
deleted file mode 100644
index 635aea7a5539..000000000000
--- a/drivers/gpu/drm/i915/soc/intel_pch.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright 2019 Intel Corporation.
- */
-
-#ifndef __INTEL_PCH__
-#define __INTEL_PCH__
-
-struct drm_i915_private;
-
-/*
- * Sorted by south display engine compatibility.
- * If the new PCH comes with a south display engine that is not
- * inherited from the latest item, please do not add it to the
- * end. Instead, add it right after its "parent" PCH.
- */
-enum intel_pch {
-	PCH_NOP = -1,	/* PCH without south display */
-	PCH_NONE = 0,	/* No PCH present */
-	PCH_IBX,	/* Ibexpeak PCH */
-	PCH_CPT,	/* Cougarpoint/Pantherpoint PCH */
-	PCH_LPT_H,	/* Lynxpoint/Wildcatpoint H PCH */
-	PCH_LPT_LP,	/* Lynxpoint/Wildcatpoint LP PCH */
-	PCH_SPT,        /* Sunrisepoint/Kaby Lake PCH */
-	PCH_CNP,        /* Cannon/Comet Lake PCH */
-	PCH_ICP,	/* Ice Lake/Jasper Lake PCH */
-	PCH_TGP,	/* Tiger Lake/Mule Creek Canyon PCH */
-	PCH_ADP,	/* Alder Lake PCH */
-
-	/* Fake PCHs, functionality handled on the same PCI dev */
-	PCH_DG1 = 1024,
-	PCH_DG2,
-	PCH_MTL,
-	PCH_LNL,
-};
-
-#define INTEL_PCH_TYPE(dev_priv)		((dev_priv)->pch_type)
-#define HAS_PCH_DG2(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_DG2)
-#define HAS_PCH_ADP(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_ADP)
-#define HAS_PCH_DG1(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_DG1)
-#define HAS_PCH_TGP(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_TGP)
-#define HAS_PCH_ICP(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_ICP)
-#define HAS_PCH_CNP(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_CNP)
-#define HAS_PCH_SPT(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_SPT)
-#define HAS_PCH_LPT_H(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_LPT_H)
-#define HAS_PCH_LPT_LP(dev_priv)		(INTEL_PCH_TYPE(dev_priv) == PCH_LPT_LP)
-#define HAS_PCH_LPT(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_LPT_H || \
-						 INTEL_PCH_TYPE(dev_priv) == PCH_LPT_LP)
-#define HAS_PCH_CPT(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_CPT)
-#define HAS_PCH_IBX(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_IBX)
-#define HAS_PCH_NOP(dev_priv)			(INTEL_PCH_TYPE(dev_priv) == PCH_NOP)
-#define HAS_PCH_SPLIT(dev_priv)			(INTEL_PCH_TYPE(dev_priv) != PCH_NONE)
-
-void intel_detect_pch(struct drm_i915_private *dev_priv);
-
-#endif /* __INTEL_PCH__ */
diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h
index 2db40789235c..a7caac5b8ac8 100644
--- a/drivers/gpu/drm/radeon/atombios.h
+++ b/drivers/gpu/drm/radeon/atombios.h
@@ -3825,8 +3825,7 @@ typedef struct _ATOM_DPCD_INFO
 // note2: From RV770, the memory is more than 32bit addressable, so we will change 
 //        ucTableFormatRevision=1,ucTableContentRevision=4, the structure remains
 //        exactly same as 1.1 and 1.2 (1.3 is never in use), but ulStartAddrUsedByFirmware 
-//        (in offset to start of memory address) is KB aligned instead of byte aligend.
-/***********************************************************************************/	
+//        (in offset to start of memory address) is KB aligned instead of byte aligned.
 // Note3:
 /* If we change usReserved to "usFBUsedbyDrvInKB", then to VBIOS this usFBUsedbyDrvInKB is a predefined, unchanged constant across VGA or non VGA adapter,
 for CAIL, The size of FB access area is known, only thing missing is the Offset of FB Access area, so we can  have:
diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c
index 661f374f5f27..9758f3a9df75 100644
--- a/drivers/gpu/drm/radeon/r600_hdmi.c
+++ b/drivers/gpu/drm/radeon/r600_hdmi.c
@@ -290,28 +290,6 @@ int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder)
 	return result;
 }
 
-/*
- * write the audio workaround status to the hardware
- */
-void r600_hdmi_audio_workaround(struct drm_encoder *encoder)
-{
-	struct drm_device *dev = encoder->dev;
-	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
-	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
-	uint32_t offset = dig->afmt->offset;
-	bool hdmi_audio_workaround = false; /* FIXME */
-	u32 value;
-
-	if (!hdmi_audio_workaround ||
-	    r600_hdmi_is_audio_buffer_filled(encoder))
-		value = 0; /* disable workaround */
-	else
-		value = HDMI0_AUDIO_TEST_EN; /* enable workaround */
-	WREG32_P(HDMI0_AUDIO_PACKET_CONTROL + offset,
-		 value, ~HDMI0_AUDIO_TEST_EN);
-}
-
 void r600_hdmi_audio_set_dto(struct radeon_device *rdev,
 			     struct radeon_crtc *crtc, unsigned int clock)
 {
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8605c074d9f7..63c47585afbc 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -394,9 +394,6 @@ long radeon_fence_wait_timeout(struct radeon_fence *fence, bool interruptible, l
 int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
 int radeon_fence_wait_next(struct radeon_device *rdev, int ring);
 int radeon_fence_wait_empty(struct radeon_device *rdev, int ring);
-int radeon_fence_wait_any(struct radeon_device *rdev,
-			  struct radeon_fence **fences,
-			  bool intr);
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
 void radeon_fence_unref(struct radeon_fence **fence);
 unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 8f5e07834fcc..9e697f10f9ca 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -401,7 +401,6 @@ void r600_audio_set_dto(struct drm_encoder *encoder, u32 clock);
 void r600_hdmi_update_avi_infoframe(struct drm_encoder *encoder, void *buffer,
 				    size_t size);
 void r600_hdmi_update_ACR(struct drm_encoder *encoder, uint32_t clock);
-void r600_hdmi_audio_workaround(struct drm_encoder *encoder);
 int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder);
 void r600_hdmi_update_audio_settings(struct drm_encoder *encoder);
 u32 r600_get_xclk(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 64b26bfeafc9..b8e6202f1d5b 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -409,7 +409,6 @@ static int cmp_size_smaller_first(void *priv, const struct list_head *a,
  * radeon_cs_parser_fini() - clean parser states
  * @parser:	parser structure holding parsing context.
  * @error:	error number
- * @backoff:	indicator to backoff the reservation
  *
  * If error is set than unvalidate buffer, otherwise just free memory
  * used by parsing context.
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 8ff4f18b51a9..5b5b54e876d4 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -575,48 +575,6 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 }
 
 /**
- * radeon_fence_wait_any - wait for a fence to signal on any ring
- *
- * @rdev: radeon device pointer
- * @fences: radeon fence object(s)
- * @intr: use interruptable sleep
- *
- * Wait for any requested fence to signal (all asics).  Fence
- * array is indexed by ring id.  @intr selects whether to use
- * interruptable (true) or non-interruptable (false) sleep when
- * waiting for the fences. Used by the suballocator.
- * Returns 0 if any fence has passed, error for all other cases.
- */
-int radeon_fence_wait_any(struct radeon_device *rdev,
-			  struct radeon_fence **fences,
-			  bool intr)
-{
-	uint64_t seq[RADEON_NUM_RINGS];
-	unsigned int i, num_rings = 0;
-	long r;
-
-	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-		seq[i] = 0;
-
-		if (!fences[i])
-			continue;
-
-		seq[i] = fences[i]->seq;
-		++num_rings;
-	}
-
-	/* nothing to wait for ? */
-	if (num_rings == 0)
-		return -ENOENT;
-
-	r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
-	if (r < 0)
-		return r;
-
-	return 0;
-}
-
-/**
  * radeon_fence_wait_next - wait for the next fence to signal
  *
  * @rdev: radeon device pointer
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 65a911ddd509..f9267b026f8d 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -1109,7 +1109,7 @@
 #define		MIN_POWER_SHIFT				0
 #define		MAX_POWER(x)				((x) << 16)
 #define		MAX_POWER_MASK				(0x3fff << 16)
-#define		MAX_POWER_SHIFT				0
+#define		MAX_POWER_SHIFT				16
 #define SQ_POWER_THROTTLE2                              0x8e5c
 #define		MAX_POWER_DELTA(x)			((x) << 0)
 #define		MAX_POWER_DELTA_MASK			(0x3fff << 0)
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index c5d6681645ed..e4bf484d4121 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -187,7 +187,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
 	display/xe_display.o \
 	display/xe_display_misc.o \
 	display/xe_display_rpm.o \
-	display/xe_display_rps.o \
 	display/xe_display_wa.o \
 	display/xe_dsb_buffer.o \
 	display/xe_fb_pin.o \
@@ -198,7 +197,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
 # SOC code shared with i915
 xe-$(CONFIG_DRM_XE_DISPLAY) += \
 	i915-soc/intel_dram.o \
-	i915-soc/intel_pch.o \
 	i915-soc/intel_rom.o
 
 # Display code shared with i915
@@ -273,6 +271,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
 	i915-display/intel_panel.o \
 	i915-display/intel_pfit.o \
 	i915-display/intel_pmdemand.o \
+	i915-display/intel_pch.o \
 	i915-display/intel_pps.o \
 	i915-display/intel_psr.o \
 	i915-display/intel_qp_tables.o \
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h b/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h
deleted file mode 100644
index 21fec9cc837c..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/gt/intel_rps.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef __INTEL_RPS_H__
-#define __INTEL_RPS_H__
-
-#define gen5_rps_irq_handler(x) ({})
-
-#endif /* __INTEL_RPS_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
index f89bd5e3520d..9b7572e06f34 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -21,28 +21,12 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
 	return container_of(dev, struct drm_i915_private, drm);
 }
 
+/* compat platform checks only for soc/ usage */
 #define IS_PLATFORM(xe, x) ((xe)->info.platform == x)
-#define INTEL_INFO(dev_priv)	(&((dev_priv)->info))
-#define IS_I830(dev_priv)	(dev_priv && 0)
-#define IS_I845G(dev_priv)	(dev_priv && 0)
-#define IS_I85X(dev_priv)	(dev_priv && 0)
-#define IS_I865G(dev_priv)	(dev_priv && 0)
 #define IS_I915G(dev_priv)	(dev_priv && 0)
 #define IS_I915GM(dev_priv)	(dev_priv && 0)
-#define IS_I945G(dev_priv)	(dev_priv && 0)
-#define IS_I945GM(dev_priv)	(dev_priv && 0)
-#define IS_I965G(dev_priv)	(dev_priv && 0)
-#define IS_I965GM(dev_priv)	(dev_priv && 0)
-#define IS_G45(dev_priv)	(dev_priv && 0)
-#define IS_GM45(dev_priv)	(dev_priv && 0)
-#define IS_G4X(dev_priv)	(dev_priv && 0)
 #define IS_PINEVIEW(dev_priv)	(dev_priv && 0)
-#define IS_G33(dev_priv)	(dev_priv && 0)
-#define IS_IRONLAKE(dev_priv)	(dev_priv && 0)
-#define IS_IRONLAKE_M(dev_priv) (dev_priv && 0)
-#define IS_SANDYBRIDGE(dev_priv)	(dev_priv && 0)
 #define IS_IVYBRIDGE(dev_priv)	(dev_priv && 0)
-#define IS_IVB_GT1(dev_priv)	(dev_priv && 0)
 #define IS_VALLEYVIEW(dev_priv)	(dev_priv && 0)
 #define IS_CHERRYVIEW(dev_priv)	(dev_priv && 0)
 #define IS_HASWELL(dev_priv)	(dev_priv && 0)
@@ -70,39 +54,10 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
 
 #define IS_HASWELL_ULT(dev_priv) (dev_priv && 0)
 #define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0)
-#define IS_BROADWELL_ULX(dev_priv) (dev_priv && 0)
 
 #define IS_MOBILE(xe) (xe && 0)
 
-#define IS_TIGERLAKE_UY(xe) (xe && 0)
-#define IS_COMETLAKE_ULX(xe) (xe && 0)
-#define IS_COFFEELAKE_ULX(xe) (xe && 0)
-#define IS_KABYLAKE_ULX(xe) (xe && 0)
-#define IS_SKYLAKE_ULX(xe) (xe && 0)
-#define IS_HASWELL_ULX(xe) (xe && 0)
-#define IS_COMETLAKE_ULT(xe) (xe && 0)
-#define IS_COFFEELAKE_ULT(xe) (xe && 0)
-#define IS_KABYLAKE_ULT(xe) (xe && 0)
-#define IS_SKYLAKE_ULT(xe) (xe && 0)
-
-#define IS_DG2_G10(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G10)
-#define IS_DG2_G11(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G11)
-#define IS_DG2_G12(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G12)
-#define IS_RAPTORLAKE_U(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU)
-#define IS_ICL_WITH_PORT_F(xe) (xe && 0)
 #define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe))
-
 #define HAS_128_BYTE_Y_TILING(xe) (xe || 1)
 
-#ifdef CONFIG_ARM64
-/*
- * arm64 indirectly includes linux/rtc.h,
- * which defines a irq_lock, so include it
- * here before #define-ing it
- */
-#include <linux/rtc.h>
-#endif
-
-#define irq_lock irq.lock
-
 #endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h b/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h
deleted file mode 100644
index 9c46556d33a4..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/soc/intel_pch.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#include "../../../i915/soc/intel_pch.h"
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
index 267f31697343..e8191562d122 100644
--- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
@@ -79,11 +79,11 @@ err:
 	return ERR_CAST(fb);
 }
 
-int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info,
-			      struct drm_gem_object *_obj, struct i915_vma *vma)
+int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info,
+			     struct drm_gem_object *_obj, struct i915_vma *vma)
 {
 	struct xe_bo *obj = gem_to_xe_bo(_obj);
-	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+	struct pci_dev *pdev = to_pci_dev(display->drm->dev);
 
 	if (!(obj->flags & XE_BO_FLAG_SYSTEM)) {
 		if (obj->flags & XE_BO_FLAG_STOLEN)
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 20c3bcd953b7..68f064f33d4b 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -133,9 +133,6 @@ int xe_display_init_early(struct xe_device *xe)
 	/* Fake uncore lock */
 	spin_lock_init(&xe->uncore.lock);
 
-	/* This must be called before any calls to HAS_PCH_* */
-	intel_detect_pch(xe);
-
 	intel_display_driver_early_probe(display);
 
 	/* Early display init.. */
diff --git a/drivers/gpu/drm/xe/display/xe_display_rps.c b/drivers/gpu/drm/xe/display/xe_display_rps.c
deleted file mode 100644
index fa616f9688a5..000000000000
--- a/drivers/gpu/drm/xe/display/xe_display_rps.c
+++ /dev/null
@@ -1,17 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#include "intel_display_rps.h"
-
-void intel_display_rps_boost_after_vblank(struct drm_crtc *crtc,
-					  struct dma_fence *fence)
-{
-}
-
-void intel_display_rps_mark_interactive(struct intel_display *display,
-					struct intel_atomic_state *state,
-					bool interactive)
-{
-}
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
index 7c02323e9531..b35a6f201d4a 100644
--- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -9,7 +9,6 @@
 
 #include "abi/gsc_command_header_abi.h"
 #include "intel_hdcp_gsc.h"
-#include "intel_hdcp_gsc_message.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_device_types.h"
@@ -22,7 +21,8 @@
 
 #define HECI_MEADDRESS_HDCP 18
 
-struct intel_hdcp_gsc_message {
+struct intel_hdcp_gsc_context {
+	struct xe_device *xe;
 	struct xe_bo *hdcp_bo;
 	u64 hdcp_cmd_in;
 	u64 hdcp_cmd_out;
@@ -30,14 +30,9 @@ struct intel_hdcp_gsc_message {
 
 #define HDCP_GSC_HEADER_SIZE sizeof(struct intel_gsc_mtl_header)
 
-bool intel_hdcp_gsc_cs_required(struct intel_display *display)
+bool intel_hdcp_gsc_check_status(struct drm_device *drm)
 {
-	return DISPLAY_VER(display) >= 14;
-}
-
-bool intel_hdcp_gsc_check_status(struct intel_display *display)
-{
-	struct xe_device *xe = to_xe_device(display->drm);
+	struct xe_device *xe = to_xe_device(drm);
 	struct xe_tile *tile = xe_device_get_root_tile(xe);
 	struct xe_gt *gt = tile->media_gt;
 	struct xe_gsc *gsc = &gt->uc.gsc;
@@ -69,10 +64,9 @@ out:
 }
 
 /*This function helps allocate memory for the command that we will send to gsc cs */
-static int intel_hdcp_gsc_initialize_message(struct intel_display *display,
-					     struct intel_hdcp_gsc_message *hdcp_message)
+static int intel_hdcp_gsc_initialize_message(struct xe_device *xe,
+					     struct intel_hdcp_gsc_context *gsc_context)
 {
-	struct xe_device *xe = to_xe_device(display->drm);
 	struct xe_bo *bo = NULL;
 	u64 cmd_in, cmd_out;
 	int ret = 0;
@@ -84,7 +78,7 @@ static int intel_hdcp_gsc_initialize_message(struct intel_display *display,
 				  XE_BO_FLAG_GGTT);
 
 	if (IS_ERR(bo)) {
-		drm_err(display->drm, "Failed to allocate bo for HDCP streaming command!\n");
+		drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n");
 		ret = PTR_ERR(bo);
 		goto out;
 	}
@@ -93,104 +87,60 @@ static int intel_hdcp_gsc_initialize_message(struct intel_display *display,
 	cmd_out = cmd_in + PAGE_SIZE;
 	xe_map_memset(xe, &bo->vmap, 0, 0, bo->size);
 
-	hdcp_message->hdcp_bo = bo;
-	hdcp_message->hdcp_cmd_in = cmd_in;
-	hdcp_message->hdcp_cmd_out = cmd_out;
+	gsc_context->hdcp_bo = bo;
+	gsc_context->hdcp_cmd_in = cmd_in;
+	gsc_context->hdcp_cmd_out = cmd_out;
+	gsc_context->xe = xe;
+
 out:
 	return ret;
 }
 
-static int intel_hdcp_gsc_hdcp2_init(struct intel_display *display)
+struct intel_hdcp_gsc_context *intel_hdcp_gsc_context_alloc(struct drm_device *drm)
 {
-	struct intel_hdcp_gsc_message *hdcp_message;
+	struct xe_device *xe = to_xe_device(drm);
+	struct intel_hdcp_gsc_context *gsc_context;
 	int ret;
 
-	hdcp_message = kzalloc(sizeof(*hdcp_message), GFP_KERNEL);
-
-	if (!hdcp_message)
-		return -ENOMEM;
+	gsc_context = kzalloc(sizeof(*gsc_context), GFP_KERNEL);
+	if (!gsc_context)
+		return ERR_PTR(-ENOMEM);
 
 	/*
 	 * NOTE: No need to lock the comp mutex here as it is already
 	 * going to be taken before this function called
 	 */
-	ret = intel_hdcp_gsc_initialize_message(display, hdcp_message);
+	ret = intel_hdcp_gsc_initialize_message(xe, gsc_context);
 	if (ret) {
-		drm_err(display->drm, "Could not initialize hdcp_message\n");
-		kfree(hdcp_message);
-		return ret;
+		drm_err(&xe->drm, "Could not initialize gsc_context\n");
+		kfree(gsc_context);
+		gsc_context = ERR_PTR(ret);
 	}
 
-	display->hdcp.hdcp_message = hdcp_message;
-	return ret;
-}
-
-static const struct i915_hdcp_ops gsc_hdcp_ops = {
-	.initiate_hdcp2_session = intel_hdcp_gsc_initiate_session,
-	.verify_receiver_cert_prepare_km =
-				intel_hdcp_gsc_verify_receiver_cert_prepare_km,
-	.verify_hprime = intel_hdcp_gsc_verify_hprime,
-	.store_pairing_info = intel_hdcp_gsc_store_pairing_info,
-	.initiate_locality_check = intel_hdcp_gsc_initiate_locality_check,
-	.verify_lprime = intel_hdcp_gsc_verify_lprime,
-	.get_session_key = intel_hdcp_gsc_get_session_key,
-	.repeater_check_flow_prepare_ack =
-				intel_hdcp_gsc_repeater_check_flow_prepare_ack,
-	.verify_mprime = intel_hdcp_gsc_verify_mprime,
-	.enable_hdcp_authentication = intel_hdcp_gsc_enable_authentication,
-	.close_hdcp_session = intel_hdcp_gsc_close_session,
-};
-
-int intel_hdcp_gsc_init(struct intel_display *display)
-{
-	struct i915_hdcp_arbiter *data;
-	int ret;
-
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	mutex_lock(&display->hdcp.hdcp_mutex);
-	display->hdcp.arbiter = data;
-	display->hdcp.arbiter->hdcp_dev = display->drm->dev;
-	display->hdcp.arbiter->ops = &gsc_hdcp_ops;
-	ret = intel_hdcp_gsc_hdcp2_init(display);
-	if (ret)
-		kfree(data);
-
-	mutex_unlock(&display->hdcp.hdcp_mutex);
-
-	return ret;
+	return gsc_context;
 }
 
-void intel_hdcp_gsc_fini(struct intel_display *display)
+void intel_hdcp_gsc_context_free(struct intel_hdcp_gsc_context *gsc_context)
 {
-	struct intel_hdcp_gsc_message *hdcp_message =
-					display->hdcp.hdcp_message;
-	struct i915_hdcp_arbiter *arb = display->hdcp.arbiter;
-
-	if (hdcp_message) {
-		xe_bo_unpin_map_no_vm(hdcp_message->hdcp_bo);
-		kfree(hdcp_message);
-		display->hdcp.hdcp_message = NULL;
-	}
+	if (!gsc_context)
+		return;
 
-	kfree(arb);
-	display->hdcp.arbiter = NULL;
+	xe_bo_unpin_map_no_vm(gsc_context->hdcp_bo);
+	kfree(gsc_context);
 }
 
 static int xe_gsc_send_sync(struct xe_device *xe,
-			    struct intel_hdcp_gsc_message *hdcp_message,
+			    struct intel_hdcp_gsc_context *gsc_context,
 			    u32 msg_size_in, u32 msg_size_out,
 			    u32 addr_out_off)
 {
-	struct xe_gt *gt = hdcp_message->hdcp_bo->tile->media_gt;
-	struct iosys_map *map = &hdcp_message->hdcp_bo->vmap;
+	struct xe_gt *gt = gsc_context->hdcp_bo->tile->media_gt;
+	struct iosys_map *map = &gsc_context->hdcp_bo->vmap;
 	struct xe_gsc *gsc = &gt->uc.gsc;
 	int ret;
 
-	ret = xe_gsc_pkt_submit_kernel(gsc, hdcp_message->hdcp_cmd_in, msg_size_in,
-				       hdcp_message->hdcp_cmd_out, msg_size_out);
+	ret = xe_gsc_pkt_submit_kernel(gsc, gsc_context->hdcp_cmd_in, msg_size_in,
+				       gsc_context->hdcp_cmd_out, msg_size_out);
 	if (ret) {
 		drm_err(&xe->drm, "failed to send gsc HDCP msg (%d)\n", ret);
 		return ret;
@@ -205,12 +155,12 @@ static int xe_gsc_send_sync(struct xe_device *xe,
 	return ret;
 }
 
-ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in,
-				size_t msg_in_len, u8 *msg_out,
-				size_t msg_out_len)
+ssize_t intel_hdcp_gsc_msg_send(struct intel_hdcp_gsc_context *gsc_context,
+				void *msg_in, size_t msg_in_len,
+				void *msg_out, size_t msg_out_len)
 {
+	struct xe_device *xe = gsc_context->xe;
 	const size_t max_msg_size = PAGE_SIZE - HDCP_GSC_HEADER_SIZE;
-	struct intel_hdcp_gsc_message *hdcp_message;
 	u64 host_session_id;
 	u32 msg_size_in, msg_size_out;
 	u32 addr_out_off, addr_in_wr_off = 0;
@@ -223,15 +173,14 @@ ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in,
 
 	msg_size_in = msg_in_len + HDCP_GSC_HEADER_SIZE;
 	msg_size_out = msg_out_len + HDCP_GSC_HEADER_SIZE;
-	hdcp_message = xe->display.hdcp.hdcp_message;
 	addr_out_off = PAGE_SIZE;
 
 	host_session_id = xe_gsc_create_host_session_id();
 	xe_pm_runtime_get_noresume(xe);
-	addr_in_wr_off = xe_gsc_emit_header(xe, &hdcp_message->hdcp_bo->vmap,
+	addr_in_wr_off = xe_gsc_emit_header(xe, &gsc_context->hdcp_bo->vmap,
 					    addr_in_wr_off, HECI_MEADDRESS_HDCP,
 					    host_session_id, msg_in_len);
-	xe_map_memcpy_to(xe, &hdcp_message->hdcp_bo->vmap, addr_in_wr_off,
+	xe_map_memcpy_to(xe, &gsc_context->hdcp_bo->vmap, addr_in_wr_off,
 			 msg_in, msg_in_len);
 	/*
 	 * Keep sending request in case the pending bit is set no need to add
@@ -240,7 +189,7 @@ ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in,
 	 * 20 times each message 50 ms apart
 	 */
 	do {
-		ret = xe_gsc_send_sync(xe, hdcp_message, msg_size_in, msg_size_out,
+		ret = xe_gsc_send_sync(xe, gsc_context, msg_size_in, msg_size_out,
 				       addr_out_off);
 
 		/* Only try again if gsc says so */
@@ -254,7 +203,7 @@ ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in,
 	if (ret)
 		goto out;
 
-	xe_map_memcpy_from(xe, msg_out, &hdcp_message->hdcp_bo->vmap,
+	xe_map_memcpy_from(xe, msg_out, &gsc_context->hdcp_bo->vmap,
 			   addr_out_off + HDCP_GSC_HEADER_SIZE,
 			   msg_out_len);
 
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index ef1e5256c56a..0e502feaca81 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -46,8 +46,11 @@ static void read_l3cc_table(struct xe_gt *gt,
 	unsigned int fw_ref, i;
 	u32 reg_val;
 
-	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
-	KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n");
+	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+		xe_force_wake_put(gt_to_fw(gt), fw_ref);
+		KUNIT_ASSERT_TRUE_MSG(test, true, "Forcewake Failed.\n");
+	}
 
 	for (i = 0; i < info->num_mocs_regs; i++) {
 		if (!(i & 1)) {
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 3a84a9d92c48..d99d91fe8aa9 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -2569,7 +2569,7 @@ static int gem_create_user_ext_set_property(struct xe_device *xe,
 	int err;
 	u32 idx;
 
-	err = __copy_from_user(&ext, address, sizeof(ext));
+	err = copy_from_user(&ext, address, sizeof(ext));
 	if (XE_IOCTL_DBG(xe, err))
 		return -EFAULT;
 
@@ -2606,7 +2606,7 @@ static int gem_create_user_extensions(struct xe_device *xe, struct xe_bo *bo,
 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
 		return -E2BIG;
 
-	err = __copy_from_user(&ext, address, sizeof(ext));
+	err = copy_from_user(&ext, address, sizeof(ext));
 	if (XE_IOCTL_DBG(xe, err))
 		return -EFAULT;
 
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index a9e618abf8ac..7a8af2311318 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -177,6 +177,8 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 	struct xe_devcoredump *coredump = data;
 	struct xe_devcoredump_snapshot *ss;
 	ssize_t byte_copied;
+	u32 chunk_offset;
+	ssize_t new_chunk_position;
 
 	if (!coredump)
 		return -ENODEV;
@@ -201,10 +203,14 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 		return 0;
 	}
 
+	new_chunk_position = div_u64_rem(offset,
+					 XE_DEVCOREDUMP_CHUNK_MAX,
+					 &chunk_offset);
+
 	if (offset >= ss->read.chunk_position + XE_DEVCOREDUMP_CHUNK_MAX ||
 	    offset < ss->read.chunk_position) {
-		ss->read.chunk_position =
-			ALIGN_DOWN(offset, XE_DEVCOREDUMP_CHUNK_MAX);
+		ss->read.chunk_position = new_chunk_position *
+			XE_DEVCOREDUMP_CHUNK_MAX;
 
 		__xe_devcoredump_read(ss->read.buffer,
 				      XE_DEVCOREDUMP_CHUNK_MAX,
@@ -213,8 +219,7 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 
 	byte_copied = count < ss->read.size - offset ? count :
 		ss->read.size - offset;
-	memcpy(buffer, ss->read.buffer +
-	       (offset % XE_DEVCOREDUMP_CHUNK_MAX), byte_copied);
+	memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied);
 
 	mutex_unlock(&coredump->lock);
 
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 75e753e0a682..c02c4c4e9412 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -26,6 +26,7 @@
 #include "xe_bo_evict.h"
 #include "xe_debugfs.h"
 #include "xe_devcoredump.h"
+#include "xe_device_sysfs.h"
 #include "xe_dma_buf.h"
 #include "xe_drm_client.h"
 #include "xe_drv.h"
@@ -915,6 +916,10 @@ int xe_device_probe(struct xe_device *xe)
 	if (err)
 		goto err_unregister_display;
 
+	err = xe_device_sysfs_init(xe);
+	if (err)
+		goto err_unregister_display;
+
 	xe_debugfs_register(xe);
 
 	err = xe_hwmon_register(xe);
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c
index 7efbd4c52791..9628e9a0a0af 100644
--- a/drivers/gpu/drm/xe/xe_device_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
@@ -3,14 +3,16 @@
  * Copyright © 2023 Intel Corporation
  */
 
+#include <linux/device.h>
 #include <linux/kobject.h>
 #include <linux/pci.h>
 #include <linux/sysfs.h>
 
-#include <drm/drm_managed.h>
-
 #include "xe_device.h"
 #include "xe_device_sysfs.h"
+#include "xe_mmio.h"
+#include "xe_pcode_api.h"
+#include "xe_pcode.h"
 #include "xe_pm.h"
 
 /**
@@ -63,11 +65,93 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR_RW(vram_d3cold_threshold);
 
+/**
+ * DOC: PCIe Gen5 Limitations
+ *
+ * Default link speed of discrete GPUs is determined by configuration parameters
+ * stored in their flash memory, which are subject to override through user
+ * initiated firmware updates. It has been observed that devices configured with
+ * PCIe Gen5 as their default link speed can come across link quality issues due
+ * to host or motherboard limitations and may have to auto-downgrade their link
+ * to PCIe Gen4 speed when faced with unstable link at Gen5, which makes
+ * firmware updates rather risky on such setups. It is required to ensure that
+ * the device is capable of auto-downgrading its link to PCIe Gen4 speed before
+ * pushing the firmware image with PCIe Gen5 as default configuration. This can
+ * be done by reading ``auto_link_downgrade_capable`` sysfs entry, which will
+ * denote if the device is capable of auto-downgrading its link to PCIe Gen4
+ * speed with boolean output value of ``0`` or ``1``, meaning `incapable` or
+ * `capable` respectively.
+ *
+ * .. code-block:: shell
+ *
+ *    $ cat /sys/bus/pci/devices/<bdf>/auto_link_downgrade_capable
+ *
+ * Pushing the firmware image with PCIe Gen5 as default configuration on a auto
+ * link downgrade incapable device and facing link instability due to host or
+ * motherboard limitations can result in driver failing to bind to the device,
+ * making further firmware updates impossible with RMA being the only last
+ * resort.
+ *
+ * Link downgrade status of auto link downgrade capable devices is available
+ * through ``auto_link_downgrade_status`` sysfs entry with boolean output value
+ * of ``0`` or ``1``, where ``0`` means no auto-downgrading was required during
+ * link training (which is the optimal scenario) and ``1`` means the device has
+ * auto-downgraded its link to PCIe Gen4 speed due to unstable Gen5 link.
+ *
+ * .. code-block:: shell
+ *
+ *    $ cat /sys/bus/pci/devices/<bdf>/auto_link_downgrade_status
+ */
+
+static ssize_t
+auto_link_downgrade_capable_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	u32 cap, val;
+
+	xe_pm_runtime_get(xe);
+	val = xe_mmio_read32(xe_root_tile_mmio(xe), BMG_PCIE_CAP);
+	xe_pm_runtime_put(xe);
+
+	cap = REG_FIELD_GET(LINK_DOWNGRADE, val);
+	return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE ? true : false);
+}
+static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_capable);
+
+static ssize_t
+auto_link_downgrade_status_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	u32 val;
+	int ret;
+
+	xe_pm_runtime_get(xe);
+	ret = xe_pcode_read(xe_device_get_root_tile(xe),
+			    PCODE_MBOX(DGFX_PCODE_STATUS, DGFX_GET_INIT_STATUS, 0),
+			    &val, NULL);
+	xe_pm_runtime_put(xe);
+
+	return ret ?: sysfs_emit(buf, "%u\n", REG_FIELD_GET(DGFX_LINK_DOWNGRADE_STATUS, val));
+}
+static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_status);
+
+static const struct attribute *auto_link_downgrade_attrs[] = {
+	&dev_attr_auto_link_downgrade_capable.attr,
+	&dev_attr_auto_link_downgrade_status.attr,
+	NULL
+};
+
 static void xe_device_sysfs_fini(void *arg)
 {
 	struct xe_device *xe = arg;
 
-	sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
+	if (xe->d3cold.capable)
+		sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
+
+	if (xe->info.platform == XE_BATTLEMAGE)
+		sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs);
 }
 
 int xe_device_sysfs_init(struct xe_device *xe)
@@ -75,9 +159,17 @@ int xe_device_sysfs_init(struct xe_device *xe)
 	struct device *dev = xe->drm.dev;
 	int ret;
 
-	ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
-	if (ret)
-		return ret;
+	if (xe->d3cold.capable) {
+		ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
+		if (ret)
+			return ret;
+	}
+
+	if (xe->info.platform == XE_BATTLEMAGE) {
+		ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs);
+		if (ret)
+			return ret;
+	}
 
 	return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe);
 }
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 495bc00ebed4..06c65dace026 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -31,7 +31,6 @@
 #endif
 
 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
-#include "soc/intel_pch.h"
 #include "intel_display_core.h"
 #include "intel_display_device.h"
 #endif
@@ -589,7 +588,6 @@ struct xe_device {
 	 * migrating to the right sub-structs
 	 */
 	struct intel_display display;
-	enum intel_pch pch_type;
 
 	struct dram_info {
 		bool wm_lv_0_adjust_needed;
diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c
index e2bb156c71fb..96732613b4b7 100644
--- a/drivers/gpu/drm/xe/xe_eu_stall.c
+++ b/drivers/gpu/drm/xe/xe_eu_stall.c
@@ -283,7 +283,7 @@ static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension
 	int err;
 	u32 idx;
 
-	err = __copy_from_user(&ext, address, sizeof(ext));
+	err = copy_from_user(&ext, address, sizeof(ext));
 	if (XE_IOCTL_DBG(xe, err))
 		return -EFAULT;
 
@@ -313,7 +313,7 @@ static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension,
 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
 		return -E2BIG;
 
-	err = __copy_from_user(&ext, address, sizeof(ext));
+	err = copy_from_user(&ext, address, sizeof(ext));
 	if (XE_IOCTL_DBG(xe, err))
 		return -EFAULT;
 
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index b75adfc99fb7..44364c042ad7 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -176,8 +176,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	}
 
 	if (xe_exec_queue_is_parallel(q)) {
-		err = __copy_from_user(addresses, addresses_user, sizeof(u64) *
-				       q->width);
+		err = copy_from_user(addresses, addresses_user, sizeof(u64) *
+				     q->width);
 		if (err) {
 			err = -EFAULT;
 			goto err_syncs;
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 606922d9dd73..21d4ced31dd9 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -479,7 +479,7 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe,
 	int err;
 	u32 idx;
 
-	err = __copy_from_user(&ext, address, sizeof(ext));
+	err = copy_from_user(&ext, address, sizeof(ext));
 	if (XE_IOCTL_DBG(xe, err))
 		return -EFAULT;
 
@@ -518,7 +518,7 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue
 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
 		return -E2BIG;
 
-	err = __copy_from_user(&ext, address, sizeof(ext));
+	err = copy_from_user(&ext, address, sizeof(ext));
 	if (XE_IOCTL_DBG(xe, err))
 		return -EFAULT;
 
@@ -618,9 +618,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 	if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
 		return -EINVAL;
 
-	err = __copy_from_user(eci, user_eci,
-			       sizeof(struct drm_xe_engine_class_instance) *
-			       len);
+	err = copy_from_user(eci, user_eci,
+			     sizeof(struct drm_xe_engine_class_instance) * len);
 	if (XE_IOCTL_DBG(xe, err))
 		return -EFAULT;
 
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index fd41113f8572..0bcf97063ff6 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -555,6 +555,28 @@ void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc)
 		flush_work(&gsc->work);
 }
 
+void xe_gsc_stop_prepare(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	int ret;
+
+	if (!xe_uc_fw_is_loadable(&gsc->fw) || xe_uc_fw_is_in_error_state(&gsc->fw))
+		return;
+
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GSC);
+
+	/*
+	 * If the GSC FW load or the proxy init are interrupted, the only way
+	 * to recover it is to do an FLR and reload the GSC from scratch.
+	 * Therefore, let's wait for the init to complete before stopping
+	 * operations. The proxy init is the last step, so we can just wait on
+	 * that
+	 */
+	ret = xe_gsc_wait_for_proxy_init_done(gsc);
+	if (ret)
+		xe_gt_err(gt, "failed to wait for GSC init completion before uc stop\n");
+}
+
 /*
  * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a
  * GSC engine reset by writing a notification bit in the GS1 register and then
diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h
index d99f66c38075..b8b8e0810ad9 100644
--- a/drivers/gpu/drm/xe/xe_gsc.h
+++ b/drivers/gpu/drm/xe/xe_gsc.h
@@ -16,6 +16,7 @@ struct xe_hw_engine;
 int xe_gsc_init(struct xe_gsc *gsc);
 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc);
 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc);
+void xe_gsc_stop_prepare(struct xe_gsc *gsc);
 void xe_gsc_load_start(struct xe_gsc *gsc);
 void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec);
 
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index 8cf70b228ff3..d0519cd6704a 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -71,6 +71,17 @@ bool xe_gsc_proxy_init_done(struct xe_gsc *gsc)
 	       HECI1_FWSTS1_PROXY_STATE_NORMAL;
 }
 
+int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+
+	/* Proxy init can take up to 500ms, so wait double that for safety */
+	return xe_mmio_wait32(&gt->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE),
+			      HECI1_FWSTS1_CURRENT_STATE,
+			      HECI1_FWSTS1_PROXY_STATE_NORMAL,
+			      USEC_PER_SEC, NULL, false);
+}
+
 static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set)
 {
 	struct xe_gt *gt = gsc_to_gt(gsc);
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h
index fdef56995cd4..765602221dbc 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.h
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h
@@ -12,6 +12,7 @@ struct xe_gsc;
 
 int xe_gsc_proxy_init(struct xe_gsc *gsc);
 bool xe_gsc_proxy_init_done(struct xe_gsc *gsc);
+int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc);
 int xe_gsc_proxy_start(struct xe_gsc *gsc);
 
 int xe_gsc_proxy_request_handler(struct xe_gsc *gsc);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 8068b4bc0a09..0e5d243c9451 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -899,7 +899,7 @@ void xe_gt_suspend_prepare(struct xe_gt *gt)
 
 	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 
-	xe_uc_stop_prepare(&gt->uc);
+	xe_uc_suspend_prepare(&gt->uc);
 
 	xe_force_wake_put(gt_to_fw(gt), fw_ref);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index a88076e9cc7d..119a55bb7580 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -92,22 +92,23 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p)
 	struct xe_hw_engine *hwe;
 	enum xe_hw_engine_id id;
 	unsigned int fw_ref;
+	int ret = 0;
 
 	xe_pm_runtime_get(xe);
 	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
-		xe_pm_runtime_put(xe);
-		xe_force_wake_put(gt_to_fw(gt), fw_ref);
-		return -ETIMEDOUT;
+		ret = -ETIMEDOUT;
+		goto fw_put;
 	}
 
 	for_each_hw_engine(hwe, gt, id)
 		xe_hw_engine_print(hwe, p);
 
+fw_put:
 	xe_force_wake_put(gt_to_fw(gt), fw_ref);
 	xe_pm_runtime_put(xe);
 
-	return 0;
+	return ret;
 }
 
 static int powergate_info(struct xe_gt *gt, struct drm_printer *p)
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index c5aace59b62c..bac5471a1a78 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -1510,30 +1510,32 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
 
 	xe_uc_fw_print(&guc->fw, p);
 
-	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
-	if (!fw_ref)
-		return;
+	if (!IS_SRIOV_VF(gt_to_xe(gt))) {
+		fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+		if (!fw_ref)
+			return;
+
+		status = xe_mmio_read32(&gt->mmio, GUC_STATUS);
+
+		drm_printf(p, "\nGuC status 0x%08x:\n", status);
+		drm_printf(p, "\tBootrom status = 0x%x\n",
+			   REG_FIELD_GET(GS_BOOTROM_MASK, status));
+		drm_printf(p, "\tuKernel status = 0x%x\n",
+			   REG_FIELD_GET(GS_UKERNEL_MASK, status));
+		drm_printf(p, "\tMIA Core status = 0x%x\n",
+			   REG_FIELD_GET(GS_MIA_MASK, status));
+		drm_printf(p, "\tLog level = %d\n",
+			   xe_guc_log_get_level(&guc->log));
+
+		drm_puts(p, "\nScratch registers:\n");
+		for (i = 0; i < SOFT_SCRATCH_COUNT; i++) {
+			drm_printf(p, "\t%2d: \t0x%x\n",
+				   i, xe_mmio_read32(&gt->mmio, SOFT_SCRATCH(i)));
+		}
 
-	status = xe_mmio_read32(&gt->mmio, GUC_STATUS);
-
-	drm_printf(p, "\nGuC status 0x%08x:\n", status);
-	drm_printf(p, "\tBootrom status = 0x%x\n",
-		   REG_FIELD_GET(GS_BOOTROM_MASK, status));
-	drm_printf(p, "\tuKernel status = 0x%x\n",
-		   REG_FIELD_GET(GS_UKERNEL_MASK, status));
-	drm_printf(p, "\tMIA Core status = 0x%x\n",
-		   REG_FIELD_GET(GS_MIA_MASK, status));
-	drm_printf(p, "\tLog level = %d\n",
-		   xe_guc_log_get_level(&guc->log));
-
-	drm_puts(p, "\nScratch registers:\n");
-	for (i = 0; i < SOFT_SCRATCH_COUNT; i++) {
-		drm_printf(p, "\t%2d: \t0x%x\n",
-			   i, xe_mmio_read32(&gt->mmio, SOFT_SCRATCH(i)));
+		xe_force_wake_put(gt_to_fw(gt), fw_ref);
 	}
 
-	xe_force_wake_put(gt_to_fw(gt), fw_ref);
-
 	drm_puts(p, "\n");
 	xe_guc_ct_print(&guc->ct, p, false);
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 813c3c0bb250..369be36f7dc5 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1179,9 +1179,12 @@ trigger_reset:
 		process_name = q->vm->xef->process_name;
 		pid = q->vm->xef->pid;
 	}
-	xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]",
-		     xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
-		     q->guc->id, q->flags, process_name, pid);
+
+	if (!exec_queue_killed(q))
+		xe_gt_notice(guc_to_gt(guc),
+			     "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]",
+			     xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
+			     q->guc->id, q->flags, process_name, pid);
 
 	trace_xe_sched_job_timedout(job);
 
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 346f357b3d1f..fb842fa0552e 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -1301,7 +1301,7 @@ static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_fr
 	int err;
 	u32 idx;
 
-	err = __copy_from_user(&ext, address, sizeof(ext));
+	err = copy_from_user(&ext, address, sizeof(ext));
 	if (XE_IOCTL_DBG(oa->xe, err))
 		return -EFAULT;
 
@@ -1338,7 +1338,7 @@ static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from fro
 	if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS))
 		return -E2BIG;
 
-	err = __copy_from_user(&ext, address, sizeof(ext));
+	err = copy_from_user(&ext, address, sizeof(ext));
 	if (XE_IOCTL_DBG(oa->xe, err))
 		return -EFAULT;
 
@@ -2281,7 +2281,7 @@ int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *fi
 		return -EACCES;
 	}
 
-	err = __copy_from_user(&param, u64_to_user_ptr(data), sizeof(param));
+	err = copy_from_user(&param, u64_to_user_ptr(data), sizeof(param));
 	if (XE_IOCTL_DBG(oa->xe, err))
 		return -EFAULT;
 
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
index e622ae17f08d..127d4d26c4cf 100644
--- a/drivers/gpu/drm/xe/xe_pcode_api.h
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -34,6 +34,7 @@
 #define   DGFX_PCODE_STATUS		0x7E
 #define     DGFX_GET_INIT_STATUS	0x0
 #define     DGFX_INIT_STATUS_COMPLETE	0x1
+#define     DGFX_LINK_DOWNGRADE_STATUS	REG_BIT(31)
 
 #define   PCODE_POWER_SETUP			0x7C
 #define     POWER_SETUP_SUBCOMMAND_READ_I1	0x4
@@ -66,6 +67,10 @@
 /* Auxiliary info bits */
 #define   AUXINFO_HISTORY_OFFSET	REG_GENMASK(31, 29)
 
+#define BMG_PCIE_CAP			XE_REG(0x138340)
+#define   LINK_DOWNGRADE		REG_GENMASK(1, 0)
+#define     DOWNGRADE_CAPABLE		2
+
 struct pcode_err_decode {
 	int errno;
 	const char *str;
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 38514cef817e..693866def183 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -16,7 +16,6 @@
 #include "xe_bo.h"
 #include "xe_bo_evict.h"
 #include "xe_device.h"
-#include "xe_device_sysfs.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
@@ -273,6 +272,7 @@ int xe_pm_init_early(struct xe_device *xe)
 	if (err)
 		return err;
 
+	xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe);
 	return 0;
 }
 ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */
@@ -344,13 +344,7 @@ int xe_pm_init(struct xe_device *xe)
 	if (!xe_device_uc_enabled(xe))
 		return 0;
 
-	xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe);
-
 	if (xe->d3cold.capable) {
-		err = xe_device_sysfs_init(xe);
-		if (err)
-			goto err_unregister;
-
 		vram_threshold = vram_threshold_value(xe);
 		err = xe_pm_set_vram_threshold(xe, vram_threshold);
 		if (err)
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 890f6b2f40e9..d25f02c8d7fc 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -726,7 +726,6 @@ static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile,
 }
 #endif
 
-
 /**
  * xe_svm_handle_pagefault() - SVM handle page fault
  * @vm: The VM.
@@ -966,3 +965,15 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
 	return 0;
 }
 #endif
+
+/**
+ * xe_svm_flush() - SVM flush
+ * @vm: The VM.
+ *
+ * Flush all SVM actions.
+ */
+void xe_svm_flush(struct xe_vm *vm)
+{
+	if (xe_vm_in_fault_mode(vm))
+		flush_work(&vm->svm.garbage_collector.work);
+}
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 3d441eb1f7ea..2881af1e60b2 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -96,6 +96,8 @@ static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range)
 #define xe_svm_notifier_unlock(vm__)	\
 	drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm)
 
+void xe_svm_flush(struct xe_vm *vm);
+
 #else
 #include <linux/interval_tree.h>
 
@@ -180,5 +182,9 @@ static inline void xe_svm_notifier_lock(struct xe_vm *vm)
 static inline void xe_svm_notifier_unlock(struct xe_vm *vm)
 {
 }
+
+static inline void xe_svm_flush(struct xe_vm *vm)
+{
+}
 #endif
 #endif
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index c14bd2282044..3a8751a8b92d 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -244,7 +244,7 @@ void xe_uc_gucrc_disable(struct xe_uc *uc)
 
 void xe_uc_stop_prepare(struct xe_uc *uc)
 {
-	xe_gsc_wait_for_worker_completion(&uc->gsc);
+	xe_gsc_stop_prepare(&uc->gsc);
 	xe_guc_stop_prepare(&uc->guc);
 }
 
@@ -278,6 +278,12 @@ again:
 		goto again;
 }
 
+void xe_uc_suspend_prepare(struct xe_uc *uc)
+{
+	xe_gsc_wait_for_worker_completion(&uc->gsc);
+	xe_guc_stop_prepare(&uc->guc);
+}
+
 int xe_uc_suspend(struct xe_uc *uc)
 {
 	/* GuC submission not enabled, nothing to do */
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index 3813c1ede450..c23e6f5e2514 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -18,6 +18,7 @@ int xe_uc_reset_prepare(struct xe_uc *uc);
 void xe_uc_stop_prepare(struct xe_uc *uc);
 void xe_uc_stop(struct xe_uc *uc);
 int xe_uc_start(struct xe_uc *uc);
+void xe_uc_suspend_prepare(struct xe_uc *uc);
 int xe_uc_suspend(struct xe_uc *uc);
 int xe_uc_sanitize_reset(struct xe_uc *uc);
 void xe_uc_declare_wedged(struct xe_uc *uc);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 80e56e232685..79323c78130f 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3101,9 +3101,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
 		if (!*bind_ops)
 			return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
 
-		err = __copy_from_user(*bind_ops, bind_user,
-				       sizeof(struct drm_xe_vm_bind_op) *
-				       args->num_binds);
+		err = copy_from_user(*bind_ops, bind_user,
+				     sizeof(struct drm_xe_vm_bind_op) *
+				     args->num_binds);
 		if (XE_IOCTL_DBG(xe, err)) {
 			err = -EFAULT;
 			goto free_bind_ops;
@@ -3331,8 +3331,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	}
 
 	/* Ensure all UNMAPs visible */
-	if (xe_vm_in_fault_mode(vm))
-		flush_work(&vm->svm.garbage_collector.work);
+	xe_svm_flush(vm);
 
 	err = down_write_killable(&vm->lock);
 	if (err)
diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h
index d9614e2c8939..7b19192c7031 100644
--- a/include/drm/display/drm_dp_helper.h
+++ b/include/drm/display/drm_dp_helper.h
@@ -971,5 +971,7 @@ int drm_dp_bw_channel_coding_efficiency(bool is_uhbr);
 int drm_dp_max_dprx_data_rate(int max_link_rate, int max_lanes);
 
 ssize_t drm_dp_vsc_sdp_pack(const struct drm_dp_vsc_sdp *vsc, struct dp_sdp *sdp);
+int drm_dp_link_symbol_cycles(int lane_count, int pixels, int dsc_slice_count,
+			      int bpp_x16, int symbol_size, bool is_mst);
 
 #endif /* _DRM_DP_HELPER_H_ */
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index 94d365b22505..5c3b2aa3e69d 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -446,6 +446,9 @@ static inline bool drm_is_accel_client(const struct drm_file *file_priv)
 	return file_priv->minor->type == DRM_MINOR_ACCEL;
 }
 
+__printf(2, 3)
+void drm_file_err(struct drm_file *file_priv, const char *fmt, ...);
+
 void drm_file_update_pid(struct drm_file *);
 
 struct drm_minor *drm_minor_acquire(struct xarray *minors_xa, unsigned int minor_id);
diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h
index 66b1e56fbb81..62df222fe0f1 100644
--- a/include/linux/dma-fence-unwrap.h
+++ b/include/linux/dma-fence-unwrap.h
@@ -52,6 +52,8 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
 					   struct dma_fence **fences,
 					   struct dma_fence_unwrap *cursors);
 
+int dma_fence_dedup_array(struct dma_fence **array, int num_fences);
+
 /**
  * dma_fence_unwrap_merge - unwrap and merge fences
  *
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 25d5c6e90a99..1fd96474e64c 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,9 @@ extern "C" {
 #define DRM_AMDGPU_VM			0x13
 #define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
 #define DRM_AMDGPU_SCHED		0x15
+#define DRM_AMDGPU_USERQ		0x16
+#define DRM_AMDGPU_USERQ_SIGNAL		0x17
+#define DRM_AMDGPU_USERQ_WAIT		0x18
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +74,9 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
 #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
 #define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
+#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
+#define DRM_IOCTL_AMDGPU_USERQ_WAIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
 
 /**
  * DOC: memory domains
@@ -319,6 +325,260 @@ union drm_amdgpu_ctx {
 	union drm_amdgpu_ctx_out out;
 };
 
+/* user queue IOCTL operations */
+#define AMDGPU_USERQ_OP_CREATE	1
+#define AMDGPU_USERQ_OP_FREE	2
+
+/* queue priority levels */
+/* low < normal low < normal high < high */
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK  0x3
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT 0
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW 0
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */
+/* for queues that need access to protected content */
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE  (1 << 2)
+
+/*
+ * This structure is a container to pass input configuration
+ * info for all supported userqueue related operations.
+ * For operation AMDGPU_USERQ_OP_CREATE: user is expected
+ *  to set all fields, excep the parameter 'queue_id'.
+ * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected
+ *  to be set is 'queue_id', eveything else is ignored.
+ */
+struct drm_amdgpu_userq_in {
+	/** AMDGPU_USERQ_OP_* */
+	__u32	op;
+	/** Queue id passed for operation USERQ_OP_FREE */
+	__u32	queue_id;
+	/** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */
+	__u32   ip_type;
+	/**
+	 * @doorbell_handle: the handle of doorbell GEM object
+	 * associated with this userqueue client.
+	 */
+	__u32   doorbell_handle;
+	/**
+	 * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo.
+	 * Kernel will generate absolute doorbell offset using doorbell_handle
+	 * and doorbell_offset in the doorbell bo.
+	 */
+	__u32   doorbell_offset;
+	/**
+	 * @flags: flags used for queue parameters
+	 */
+	__u32 flags;
+	/**
+	 * @queue_va: Virtual address of the GPU memory which holds the queue
+	 * object. The queue holds the workload packets.
+	 */
+	__u64   queue_va;
+	/**
+	 * @queue_size: Size of the queue in bytes, this needs to be 256-byte
+	 * aligned.
+	 */
+	__u64   queue_size;
+	/**
+	 * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 */
+	__u64   rptr_va;
+	/**
+	 * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 *
+	 * Queue, RPTR and WPTR can come from the same object, as long as the size
+	 * and alignment related requirements are met.
+	 */
+	__u64   wptr_va;
+	/**
+	 * @mqd: MQD (memory queue descriptor) is a set of parameters which allow
+	 * the GPU to uniquely define and identify a usermode queue.
+	 *
+	 * MQD data can be of different size for different GPU IP/engine and
+	 * their respective versions/revisions, so this points to a __u64 *
+	 * which holds IP specific MQD of this usermode queue.
+	 */
+	__u64 mqd;
+	/**
+	 * @size: size of MQD data in bytes, it must match the MQD structure
+	 * size of the respective engine/revision defined in UAPI for ex, for
+	 * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11).
+	 */
+	__u64 mqd_size;
+};
+
+/* The structure to carry output of userqueue ops */
+struct drm_amdgpu_userq_out {
+	/**
+	 * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique
+	 * queue ID to represent the newly created userqueue in the system, otherwise
+	 * it should be ignored.
+	 */
+	__u32	queue_id;
+	__u32 _pad;
+};
+
+union drm_amdgpu_userq {
+	struct drm_amdgpu_userq_in in;
+	struct drm_amdgpu_userq_out out;
+};
+
+/* GFX V11 IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_gfx11 {
+	/**
+	 * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   shadow_va;
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   csa_va;
+};
+
+/* GFX V11 SDMA IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_sdma_gfx11 {
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
+	 */
+	__u64   csa_va;
+};
+
+/* GFX V11 Compute IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_compute_gfx11 {
+	/**
+	 * @eop_va: Virtual address of the GPU memory to hold the EOP buffer.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
+	 */
+	__u64   eop_va;
+};
+
+/* userq signal/wait ioctl */
+struct drm_amdgpu_userq_signal {
+	/**
+	 * @queue_id: Queue handle used by the userq fence creation function
+	 * to retrieve the WPTR.
+	 */
+	__u32	queue_id;
+	__u32	pad;
+	/**
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to be signaled.
+	 */
+	__u64	syncobj_handles;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles.
+	 */
+	__u64	num_syncobj_handles;
+	/**
+	 * @bo_read_handles: The list of BO handles that the submitted user queue job
+	 * is using for read only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of BO handles that the submitted user queue job
+	 * is using for write only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_write_handles;
+	/**
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
+	 */
+	__u32	num_bo_read_handles;
+	/**
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
+	 */
+	__u32	num_bo_write_handles;
+};
+
+struct drm_amdgpu_userq_fence_info {
+	/**
+	 * @va: A gpu address allocated for each queue which stores the
+	 * read pointer (RPTR) value.
+	 */
+	__u64	va;
+	/**
+	 * @value: A 64 bit value represents the write pointer (WPTR) of the
+	 * queue commands which compared with the RPTR value to signal the
+	 * fences.
+	 */
+	__u64	value;
+};
+
+struct drm_amdgpu_userq_wait {
+	/**
+	 * @waitq_id: Queue handle used by the userq wait IOCTL to retrieve the
+	 * wait queue and maintain the fence driver references in it.
+	 */
+	__u32	waitq_id;
+	__u32	pad;
+	/**
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	syncobj_handles;
+	/**
+	 * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by
+	 * the user queue job to get the va/value pairs at given @syncobj_timeline_points.
+	 */
+	__u64	syncobj_timeline_handles;
+	/**
+	 * @syncobj_timeline_points: The list of timeline syncobj points submitted by the
+	 * user queue job for the corresponding @syncobj_timeline_handles.
+	 */
+	__u64	syncobj_timeline_points;
+	/**
+	 * @bo_read_handles: The list of read BO handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of write BO handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	bo_write_handles;
+	/**
+	 * @num_syncobj_timeline_handles: A count that represents the number of timeline
+	 * syncobj handles in @syncobj_timeline_handles.
+	 */
+	__u16	num_syncobj_timeline_handles;
+	/**
+	 * @num_fences: This field can be used both as input and output. As input it defines
+	 * the maximum number of fences that can be returned and as output it will specify
+	 * how many fences were actually returned from the ioctl.
+	 */
+	__u16	num_fences;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles.
+	 */
+	__u32	num_syncobj_handles;
+	/**
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
+	 */
+	__u32	num_bo_read_handles;
+	/**
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
+	 */
+	__u32	num_bo_write_handles;
+	/**
+	 * @out_fences: The field is a return value from the ioctl containing the list of
+	 * address/value pairs to wait for.
+	 */
+	__u64	out_fences;
+};
+
 /* vm ioctl */
 #define AMDGPU_VM_OP_RESERVE_VMID	1
 #define AMDGPU_VM_OP_UNRESERVE_VMID	2
@@ -599,6 +859,19 @@ struct drm_amdgpu_gem_va {
 	__u64 offset_in_bo;
 	/** Specify mapping size. Must be correctly aligned. */
 	__u64 map_size;
+	/**
+	 * vm_timeline_point is a sequence number used to add new timeline point.
+	 */
+	__u64 vm_timeline_point;
+	/**
+	 * The vm page table update fence is installed in given vm_timeline_syncobj_out
+	 * at vm_timeline_point.
+	 */
+	__u32 vm_timeline_syncobj_out;
+	/** the number of syncobj handles in @input_fence_syncobj_handles */
+	__u32 num_syncobj_handles;
+	/** Array of sync object handle to wait for given input fences */
+	__u64 input_fence_syncobj_handles;
 };
 
 #define AMDGPU_HW_IP_GFX          0
@@ -940,6 +1213,8 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
 #define AMDGPU_INFO_MAX_IBS			0x22
 /* query last page fault info */
 #define AMDGPU_INFO_GPUVM_FAULT			0x23
+/* query FW object size and alignment */
+#define AMDGPU_INFO_UQ_FW_AREAS			0x24
 
 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT	0
 #define AMDGPU_INFO_MMR_SE_INDEX_MASK	0xff
@@ -1198,6 +1473,9 @@ struct drm_amdgpu_info_device {
 	__u32 csa_size;
 	/* context save area base virtual alignment for gfx11 */
 	__u32 csa_alignment;
+	/* Userq IP mask (1 << AMDGPU_HW_IP_*) */
+	__u32 userq_ip_mask;
+	__u32 pad;
 };
 
 struct drm_amdgpu_info_hw_ip {
@@ -1216,6 +1494,27 @@ struct drm_amdgpu_info_hw_ip {
 	__u32  ip_discovery_version;
 };
 
+/* GFX metadata BO sizes and alignment info (in bytes) */
+struct drm_amdgpu_info_uq_fw_areas_gfx {
+	/* shadow area size */
+	__u32 shadow_size;
+	/* shadow area base virtual mem alignment */
+	__u32 shadow_alignment;
+	/* context save area size */
+	__u32 csa_size;
+	/* context save area base virtual mem alignment */
+	__u32 csa_alignment;
+};
+
+/* IP specific fw related information used in the
+ * subquery AMDGPU_INFO_UQ_FW_AREAS
+ */
+struct drm_amdgpu_info_uq_fw_areas {
+	union {
+		struct drm_amdgpu_info_uq_fw_areas_gfx gfx;
+	};
+};
+
 struct drm_amdgpu_info_num_handles {
 	/** Max handles as supported by firmware for UVD */
 	__u32  uvd_max_handles;
@@ -1279,6 +1578,23 @@ struct drm_amdgpu_info_gpuvm_fault {
 	__u32 vmhub;
 };
 
+struct drm_amdgpu_info_uq_metadata_gfx {
+	/* shadow area size for gfx11 */
+	__u32 shadow_size;
+	/* shadow area base virtual alignment for gfx11 */
+	__u32 shadow_alignment;
+	/* context save area size for gfx11 */
+	__u32 csa_size;
+	/* context save area base virtual alignment for gfx11 */
+	__u32 csa_alignment;
+};
+
+struct drm_amdgpu_info_uq_metadata {
+	union {
+		struct drm_amdgpu_info_uq_metadata_gfx gfx;
+	};
+};
+
 /*
  * Supported GPU families
  */
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 1e59344c5673..04c7d283dc7d 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -536,6 +536,8 @@ enum kfd_smi_event {
 	KFD_SMI_EVENT_QUEUE_EVICTION = 9,
 	KFD_SMI_EVENT_QUEUE_RESTORE = 10,
 	KFD_SMI_EVENT_UNMAP_FROM_GPU = 11,
+	KFD_SMI_EVENT_PROCESS_START = 12,
+	KFD_SMI_EVENT_PROCESS_END = 13,
 
 	/*
 	 * max event number, as a flag bit to get events from all processes,
@@ -651,6 +653,9 @@ struct kfd_ioctl_smi_events_args {
 		"%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\
 		(node), (unmap_trigger)
 
+#define KFD_EVENT_FMT_PROCESS(pid, task_name)\
+		"%x %s\n", (pid), (task_name)
+
 /**************************************************************************************************
  * CRIU IOCTLs (Checkpoint Restore In Userspace)
  *